Analysis

Author

Nikola Kovačević

Published

July 8, 2024

Introduction

Conducting further analysis on the cleaned up data from the green-up-project.qmd file.

Regions

current focus

Companies

  • after 2019 not included in the SURS data

per region, per company number of companies type of waste per companies, how many different types node = a company in a region, point on a map cant just focus on headquarters (HQ) of the node try to figure out connection between operator, collector and storage descriptive statistics for all years

Setup

# Load required libraries
# List of required packages
packages <- c("tidyverse", "scales", "viridis", "ggplot2", "gganimate", 
              "ggridges", "plotly", "htmlwidgets", "reshape2", "skimr", "echarts4r")

# Function to check if a package is installed, and install it if not
install_and_load <- function(pkg) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)
  }
}

if (!require("echarts4r.maps")) {
  install.packages("remotes")
  remotes::install_github('JohnCoene/echarts4r.maps')
}

# Set the working directory to the project folder
setwd("C:/Users/kovac/Desktop/Work/Green UP Project/green-up-project")

# Apply the function to each package in the list
lapply(packages, install_and_load)
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL

[[4]]
NULL

[[5]]
NULL

[[6]]
NULL

[[7]]
NULL

[[8]]
NULL

[[9]]
NULL

[[10]]
NULL

[[11]]
NULL
# List of municipalities for each region
POMURSKA <- c(
    "APAČE",
    "BELTINCI",
    "CANKOVA",
    "ČRENŠOVCI",
    "DOBROVNIK",
    "DOBRONAK",
    "GORNJA RADGONA",
    "GORNJI PETROVCI",
    "GRAD",
    "HODOŠ",
    "KOBILJE",
    "KRIŽEVCI",
    "KUZMA",
    "LENDAVA",
    "LENDAVA - LENDVA",
    "LJUTOMER",
    "MORAVSKE TOPLICE",
    "MURSKA SOBOTA",
    "ODRANCI",
    "PUCONCI",
    "RADENCI",
    "RAZKRIŽJE",
    "ROGAŠOVCI",
    "SVETI JURIJ OB ŠČAVNICI",
    "ŠALOVCI",
    "TIŠINA",
    "TURNIŠČE",
    "VELIKA POLANA",
    "VERŽEJ"
)

PODRAVSKA <- c(
    "BENEDIKT",
    "CERKVENJAK",
    "CIRKULANE",
    "DESTRNIK",
    "DORNAVA",
    "DUPLEK",
    "GORIŠNICA",
    "HAJDINA",
    "HOČE-SLIVNICA",
    "JURŠINCI",
    "KIDRIČEVO",
    "KUNGOTA",
    "LENART",
    "LOVRENC NA POHORJU",
    "MAJŠPERK",
    "MAKOLE",
    "MARIBOR",
    "MARKOVCI",
    "MIKLAVŽ NA DRAVSKEM POLJU",
    "OPLOTNICA",
    "ORMOŽ",
    "PESNICA",
    "PODLEHNIK",
    "POLJČANE",
    "PTUJ",
    "RAČE-FRAM",
    "RUŠE",
    "SELNICA OB DRAVI",
    "SLOVENSKA BISTRICA",
    "SREDIŠČE OB DRAVI",
    "STARŠE",
    "SVETA ANA",
    "SVETA TROJICA V SLOV. GORICAH",
    "SVETA TROJICA V SLOVENSKIH GORICAH",
    "SV. TROJICA V SLOV. GORICAH",
    "SVETI ANDRAŽ V SLOV. GORICAH",
    "SVETI JURIJ V SLOV. GORICAH",
    "SVETI JURIJ V SLOVENSKIH GORICAH",
    "SVETI TOMAŽ",
    "ŠENTILJ",
    "TRNOVSKA VAS",
    "VIDEM",
    "ZAVRČ",
    "ŽETALE"
)

KOROŠKA <- c(
    "ČRNA NA KOROŠKEM",
    "DRAVOGRAD",
    "MEŽICA",
    "MISLINJA",
    "MUTA",
    "PODVELKA",
    "PREVALJE",
    "RADLJE OB DRAVI",
    "PROŠKEM",
    "RAVNE NA KOROŠKEM",
    "RIBNICA NA POHORJU",
    "SLOVENJ GRADEC",
    "VUZENICA"
)

SAVINJSKA <- c(
    "BRASLOVČE",
    "CELJE",
    "DOBJE",
    "DOBRNA",
    "GORNJI GRAD",
    "KOZJE",
    "LAŠKO",
    "LJUBNO",
    "LUČE",
    "MOZIRJE",
    "NAZARJE",
    "PODČETRTEK",
    "POLZELA",
    "PREBOLD",
    "REČICA OB SAVINJI",
    "ROGAŠKA SLATINA",
    "ROGATEC",
    "SLOVENSKE KONJICE",
    "SOLČAVA",
    "ŠENTJUR",
    "ŠMARJE PRI JELŠAH",
    "ŠMARTNO OB PAKI",
    "ŠOŠTANJ",
    "ŠTORE",
    "TABOR",
    "VELENJE",
    "VITANJE",
    "VOJNIK",
    "VRANSKO",
    "ZREČE",
    "ŽALEC"
)

ZASAVSKA <- c("HRASTNIK", "LITIJA", "TRBOVLJE", "ZAGORJE OB SAVI")

POSAVSKA <- c(
    "BISTRICA OB SOTLI",
    "BREŽICE",
    "KOSTANJEVICA NA KRKI",
    "KRŠKO",
    "RADEČE",
    "SEVNICA"
)

JUGOVZHODNA.SLOVENIJA <- c(
    "ČRNOMELJ",
    "DOLENJSKE TOPLICE",
    "KOČEVJE",
    "KOSTEL",
    "LOŠKI POTOK",
    "METLIKA",
    "MIRNA",
    "MIRNA PEČ",
    "MOKRONOG-TREBELNO",
    "NOVO MESTO",
    "OSILNICA",
    "RIBNICA",
    "SEMIČ",
    "SODRAŽICA",
    "STRAŽA",
    "ŠENTJERNEJ",
    "ŠENTRUPERT",
    "ŠKOCJAN",
    "ŠMARJEŠKE TOPLICE",
    "TREBNJE",
    "ŽUŽEMBERK"
)

OSREDNJESLOVENSKA <- c(
    "BOROVNICA",
    "BREZOVICA",
    "DOBREPOLJE",
    "DOBROVA-POLHOV GRADEC",
    "DOL PRI LJUBLJANI",
    "DOMŽALE",
    "GROSUPLJE",
    "HORJUL",
    "IG",
    "IVANČNA GORICA",
    "KAMNIK",
    "KOMENDA",
    "LJUBLJANA",
    "LOG-DRAGOMER",
    "LOGATEC",
    "LUKOVICA",
    "MEDVODE",
    "MENGEŠ",
    "MORAVČE",
    "ŠKOFLJICA",
    "ŠMARTNO PRI LITIJI",
    "TRZIN",
    "VELIKE LAŠČE",
    "VODICE",
    "VRHNIKA"
)

GORENJSKA <- c(
    "BLED",
    "BOHINJ",
    "BOHINJSKA BISTRICA",
    "CERKLJE NA GORENJSKEM",
    "GORENJA VAS-POLJANE",
    "GORENJA VAS",
    "GORJE",
    "JESENICE",
    "JEZERSKO",
    "KRANJ",
    "KRANJSKA GORA",
    "NAKLO",
    "PREDDVOR",
    "RADOVLJICA",
    "ŠENČUR",
    "ŠKOFJA LOKA",
    "TRŽIČ",
    "ŽELEZNIKI",
    "ŽIRI",
    "ŽIROVNICA",
    "GOZD MARTULJEK"
)

PRIMORSKONOTRANJSKA <- c(
    "BLOKE",
    "CERKNICA",
    "ILIRSKA BISTRICA",
    "LOŠKA DOLINA",
    "PIVKA",
    "POSTOJNA",
    "NOVA VAS"
)

GORIŠKA <- c(
    "AJDOVŠČINA",
    "BOVEC",
    "BRDA",
    "CERKNO",
    "IDRIJA",
    "KANAL",
    "KOBARID",
    "MIREN-KOSTANJEVICA",
    "NOVA GORICA",
    "RENČE-VOGRSKO",
    "ŠEMPETER-VRTOJBA",
    "TOLMIN",
    "VIPAVA"
)

OBALNOKRAŠKA <- c(
    "ANKARAN",
    "DIVAČA",
    "HRPELJE-KOZINA",
    "IZOLA",
    "IZOLA - ISOLA",
    "KOMEN",
    "KOPER",
    "KOPER - CAPODISTRIA",
    "PIRAN",
    "PIRAN - PIRANO",
    "SEŽANA"
)

NEOPREDELJENO <- "NEOPREDELJENO"

UNDEFINED <- "UNDEFINED"

# Create a data frame with all municipality-region mappings
municipality_region_mapping <- data.frame(
    name_of_municipality = c(
        POMURSKA,
        PODRAVSKA,
        KOROŠKA,
        SAVINJSKA,
        ZASAVSKA,
        POSAVSKA,
        JUGOVZHODNA.SLOVENIJA,
        OSREDNJESLOVENSKA,
        GORENJSKA,
        PRIMORSKONOTRANJSKA,
        GORIŠKA,
        OBALNOKRAŠKA,
        NEOPREDELJENO,
        UNDEFINED
    ),
    statistical_region = c(
        rep("POMURSKA", length(POMURSKA)),
        rep("PODRAVSKA", length(PODRAVSKA)),
        rep("KOROŠKA", length(KOROŠKA)),
        rep("SAVINJSKA", length(SAVINJSKA)),
        rep("ZASAVSKA", length(ZASAVSKA)),
        rep("POSAVSKA", length(POSAVSKA)),
        rep("JUGOVZHODNA SLOVENIJA", length(JUGOVZHODNA.SLOVENIJA)),
        rep("OSREDNJESLOVENSKA", length(OSREDNJESLOVENSKA)),
        rep("GORENJSKA", length(GORENJSKA)),
        rep("PRIMORSKONOTRANJSKA", length(PRIMORSKONOTRANJSKA)),
        rep("GORIŠKA", length(GORIŠKA)),
        rep("OBALNOKRAŠKA", length(OBALNOKRAŠKA)),
        rep("NEOPREDELJENO", 1),
        rep("UNDEFINED", 1)
    )
)

# Function to assign clear names to waste types
clear_waste_name <- function(name) {
    case_when(
        # Sawdust, chips, cuttings, wood, particle board and veneer, not specified in 03 01 04
        name == "Sawdust/Chips/Wood, not specified in 03 01 04" ~ "Sawdust and Wood Cuttings",
        name == "Sawdust/Chips/Wood, not specified in 03 01 04" ~ "Sawdust and Wood Cuttings",
        name == "Paper and cardboard packaging and cardboard packaging" ~ "Paper and Cardboard Packaging",
        name == "Wooden packaging" ~ "Wooden Packaging",
        name == "Wood" ~ "Wood",
        name == "Wood, not specified in 20 01 37" ~ "Other Wood Waste",
        name == "Wood, not specified in 20 01 37" ~ "Other Wood Waste",
        name == "Waste bark and cork" ~ "Bark and Cork",
        name == "Waste not specified elsewhere" ~ "Unspecified Wood Waste",
        TRUE ~ name
    )
}

# Functions ----

## summarize waste stored at the start and at the end of the year per region and waste type, define a function, keep the year column
summarize_waste_storage <- function(data) {
    data |>
        group_by(year, statistical_region, type_of_waste) |>
        summarize(
            waste_stored_start_year = sum(waste_stored_start_year, na.rm = TRUE) / 1000,
            waste_stored_end_year = sum(waste_stored_end_year, na.rm = TRUE) / 1000
        ) |>
        ungroup()
}

# Theme ----

# Base theme with rotated x-axis labels
my_theme <- theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Generation Data Analysis Process

Data Loading and Preparation

  • Data from 2018 to 2022 was loaded from separate CSV files for each year.
  • The 2018 data required special handling due to its different structure:
    • Two separate files were used: generation/waste_generation_18.csv and generation/waste_management_18.csv.
    • These files were joined based on common columns (region, company, and waste type).

Data Standardization

  • Column names were standardized across all years to ensure consistency.
  • A year column was added to the 2018 data to match the structure of other years.

Data Transformation

  • For 2018 Data:
    • The delivered_to column was transformed into separate columns for different types of waste transfer.
    • A total_waste_stored column was created by summing waste_stored_start_year and waste_generated.
  • For All Years:
    • The clear_waste_name function was applied to standardize waste type names.

Data Combination

  • Data from 2019 to 2022 was combined using bind_rows().
  • The processed 2018 data was then appended to this combined dataset.

Analysis and Visualization

  • Total waste generation by year was calculated and visualized.
  • Waste generation was analyzed by region and year.
  • Waste generation was analyzed by waste type and year.
  • Waste transferred for treatment and waste stored at the end of the year were analyzed separately.

Key Data Transformations

  • Grouping: Data was frequently grouped by year, region, or waste type to allow for aggregate analysis.
  • Sum and Summarize: Within groups, waste quantities were summed to get total amounts.
  • Unit Conversion: Waste amounts were often divided by 1000 to convert from kilograms to tons.

Visualization Techniques

  • Line plots were used to show trends over time.
  • Interactive plots were created using ggplotly() for more detailed exploration.

Code

g.2023.data <- read_csv("2023 data/2023 filtered/generation/generation_23.csv")
Rows: 59 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated_stored, temporarily_stored_start_year, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2022.data <- read_csv("2022 data/2022 filtered/generation/generation_22.csv")
Rows: 60 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated_stored, temporarily_stored_start_year, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2021.data <- read_csv("2021 data/2021 filtered/generation/generation_21.csv")
Rows: 64 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated_stored, temporarily_stored_start_year, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2020.data <- read_csv("2020 data/2020 filtered/generation/generation_20.csv")
Rows: 59 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated, temporary_stored_start_year, waste_gen...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2019.data <- read_csv("2019 data/2019 filtered/generation/generation_19.csv")
Rows: 61 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated, temporary_stored_start_year, waste_gen...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2018.data.1 <- read_csv("2018 data/2018 filtered/generation/waste_generation_18.csv")
Rows: 66 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): waste_stored_start_year, waste_generated, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2018.data.2 <- read_csv("2018 data/2018 filtered/generation/waste_management_18.csv")
Rows: 79 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, delivered_to
dbl (1): total_waste_given_away

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2017.data.1 <- read_csv("2017 data/2017 filtered/generation/waste_generation_17.csv")
Rows: 63 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (4): total_waste_2017, waste_stored_start_year, total_waste_generated, w...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2017.data.2 <- read_csv("2017 data/2017 filtered/generation/waste_management_17.csv")
Rows: 86 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, management
dbl (2): total_waste_given_away, year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2016.data.1 <- read_csv("2016 data/2016 filtered/generation/waste_generation_16.csv")
Rows: 66 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_generated, waste_stored_start_year, waste_generat...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2016.data.2 <- read_csv("2016 data/2016 filtered/generation/waste_management_16.csv")
Rows: 92 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, management
dbl (2): year, total_waste_given_away

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
g.2018.data.2 <- g.2018.data.2 |>
    mutate(
        waste_handed_to_others_RS = ifelse(
            delivered_to == "processing operator in RS",
            total_waste_given_away,
            0
        ),
        waste_sent_to_EU =
            ifelse(delivered_to == "other EU country", total_waste_given_away, 0),
        waste_treated_by_producer =
            ifelse(
                delivered_to == "processed the waste themselves",
                total_waste_given_away,
                0
            )
    )

g.2018.data.2 <- g.2018.data.2 |>
  dplyr::select(-delivered_to, -total_waste_given_away)

g.2018.data.2 <- g.2018.data.2 |>
    group_by(statistical_region, type_of_waste) |>
    summarize(
        waste_handed_to_others_RS = sum(waste_handed_to_others_RS, na.rm = TRUE),
        waste_sent_to_EU = sum(waste_sent_to_EU, na.rm = TRUE),
        waste_treated_by_producer = sum(waste_treated_by_producer)
    ) |>
    ungroup()
`summarise()` has grouped output by 'statistical_region'. You can override
using the `.groups` argument.
# combine g.2018.data.1 and g.2018.data.2 based on the region and company
g.2018.data <- g.2018.data.1 |>
    left_join(
        g.2018.data.2,
        by = c(
            "statistical_region" = "statistical_region",
            # "name_of_company" = "name_of_company",
            "type_of_waste" = "type_of_waste"
        )
    )

# add year column to 2018 data
g.2018.data <- g.2018.data |>
    mutate(year = 2018)

# create a total waste stored column for 2018 data
g.2018.data <- g.2018.data |>
    mutate(total_waste_stored = waste_stored_start_year + waste_generated) |> 
    select(
        year,
        statistical_region,
        type_of_waste,
        total_waste_stored,
        waste_stored_start_year,
        waste_generated,
        waste_stored_end_year,
        waste_treated_by_producer,
        waste_handed_to_others_RS,
        waste_sent_to_EU
    )

skim(g.2018.data)
Data summary
Name g.2018.data
Number of rows 66
Number of columns 10
_______________________
Column type frequency:
character 2
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
statistical_region 0 1 7 21 0 13 0
type_of_waste 0 1 4 45 0 7 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 2018.00 0.00 2018.00 2018.00 2018.00 2018.00 2018.00 ▁▁▇▁▁
total_waste_stored 0 1 1786.22 3737.43 0.01 111.46 466.58 1840.53 26437.35 ▇▁▁▁▁
waste_stored_start_year 0 1 4.62 14.03 0.00 0.00 0.05 2.82 106.00 ▇▁▁▁▁
waste_generated 0 1 1781.60 3734.31 0.01 111.23 440.81 1836.77 26422.29 ▇▁▁▁▁
waste_stored_end_year 0 1 28.96 197.22 0.00 0.00 0.50 3.60 1602.25 ▇▁▁▁▁
waste_treated_by_producer 0 1 14.58 60.04 0.00 0.00 0.00 0.00 400.00 ▇▁▁▁▁
waste_handed_to_others_RS 0 1 1715.03 3704.83 0.01 106.86 387.44 1757.67 26202.33 ▇▁▁▁▁
waste_sent_to_EU 0 1 27.52 184.25 0.00 0.00 0.00 0.00 1466.09 ▇▁▁▁▁
g.2017.data.2$management |> unique()
[1] "given to another collector in RS"             
[2] "own processing (OVD for processing own waste)"
[3] "sent to another EU country"                   
g.2017.data.2 <- g.2017.data.2 |>
    mutate(
        waste_handed_to_others_RS = ifelse(
            management == "given to another collector in RS" ,
            total_waste_given_away,
            0
        ),
        waste_sent_to_EU =
            ifelse(management == "sent to another EU country", total_waste_given_away, 0),
        waste_treated_by_producer =
            ifelse(
                management == "own processing (OVD for processing own waste)",
                total_waste_given_away,
                0
            )
    )

g.2017.data.2 <- g.2017.data.2 |>
    select(-management, -total_waste_given_away)

g.2017.data.2 <- g.2017.data.2 |>
    group_by(statistical_region, type_of_waste) |>
    summarize(
        waste_handed_to_others_RS = sum(waste_handed_to_others_RS, na.rm = TRUE),
        waste_sent_to_EU = sum(waste_sent_to_EU, na.rm = TRUE),
        waste_treated_by_producer = sum(waste_treated_by_producer)
    ) |>
    ungroup()
`summarise()` has grouped output by 'statistical_region'. You can override
using the `.groups` argument.
g.2017.data <- g.2017.data.1 |>
    left_join(
        g.2017.data.2,
        by = c(
            "statistical_region" = "statistical_region",
            "type_of_waste" = "type_of_waste"
        )
    )

g.2017.data <- g.2017.data |>
    mutate(year = 2017)

g.2017.data <- g.2017.data |>
    select(
        year,
        statistical_region,
        type_of_waste,
        total_waste_2017,
        waste_stored_start_year,
        total_waste_generated,
        waste_stored_end_year,
        waste_treated_by_producer,
        waste_handed_to_others_RS,
        waste_sent_to_EU
    )

g.2016.data.2 <- g.2016.data.2 |>
    mutate(
        waste_handed_to_others_RS = ifelse(
            management == "given to another collector in RS" ,
            total_waste_given_away,
            0
        ),
        waste_sent_to_EU =
            ifelse(management == "sent to another EU country", total_waste_given_away, 0),
        waste_treated_by_producer =
            ifelse(
                management == "own processing (OVD for processing own waste)",
                total_waste_given_away,
                0
            )
    )

g.2016.data.2 <- g.2016.data.2 |>
    select(-management, -total_waste_given_away)

g.2016.data.2 <- g.2016.data.2 |>
    group_by(statistical_region, type_of_waste) |>
    summarize(
        waste_handed_to_others_RS = sum(waste_handed_to_others_RS, na.rm = TRUE),
        waste_sent_to_EU = sum(waste_sent_to_EU, na.rm = TRUE),
        waste_treated_by_producer = sum(waste_treated_by_producer)
    ) |>
    ungroup()
`summarise()` has grouped output by 'statistical_region'. You can override
using the `.groups` argument.
g.2016.data <- g.2016.data.1 |>
    left_join(
        g.2016.data.2,
        by = c(
            "statistical_region" = "statistical_region",
            "type_of_waste" = "type_of_waste"
        )
    )

g.2016.data <- g.2016.data |>
    mutate(year = 2016)

g.2016.data[is.na(g.2016.data)] <- 0

g.2016.data <- g.2016.data |>
    mutate(waste_sent_for_treatment_outside_EU = 0)

g.2017.data <- g.2017.data |>
    mutate(waste_sent_for_treatment_outside_EU = 0)

g.2018.data <- g.2018.data |>
    mutate(waste_sent_for_treatment_outside_EU = 0)

names(g.2016.data) <- colnames(g.2022.data)
names(g.2017.data) <- colnames(g.2022.data)
names(g.2018.data) <- colnames(g.2022.data)
names(g.2019.data) <- colnames(g.2022.data)
names(g.2020.data) <- colnames(g.2022.data)
names(g.2021.data) <- colnames(g.2022.data)
names(g.2023.data) <- colnames(g.2022.data)

combined_data <- bind_rows(g.2016.data, g.2017.data, g.2018.data, g.2019.data, g.2020.data, g.2021.data, g.2022.data, g.2023.data)

# apply clear_waste_name
combined_data <- combined_data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

# export the combined generation data to the data folder
write_csv(combined_data, "Interface/Visualization/data/gnr_combined.csv")

# Summarize total waste generation by year
total_waste_by_year <- combined_data |>
    group_by(year) |>
    summarize(total_generated_waste = sum(generated_in_the_year, na.rm = TRUE))

# Plot 1: Total waste generation
t <- ggplot(total_waste_by_year, aes(x = year, y = total_generated_waste)) +
    geom_line(group = 1) +
    geom_point() +
    labs(title = "Total Wood Waste Generation by Year", 
         x = "Year", 
         y = "Total Generated Waste (tons)") +
    my_theme +
    scale_x_continuous(breaks = unique(total_waste_by_year$year))

waste_by_region_year <- combined_data |> 
    group_by(statistical_region, year) |>
    summarize(total_generated_waste = sum(generated_in_the_year, na.rm = TRUE)) |> 
    ungroup()
`summarise()` has grouped output by 'statistical_region'. You can override
using the `.groups` argument.
# Plot 2: Waste by region
t2 <- ggplot(waste_by_region_year,
    aes(x = year, y = total_generated_waste, color = statistical_region)) +
    geom_line() +
    geom_point() +
    labs(title = "Total Wood Waste Generation by Region and Year", 
         x = "Year", 
         y = "Total Generated Waste (tons)") +
    my_theme +
    scale_x_continuous(breaks = unique(waste_by_region_year$year))

waste_by_type_year <- combined_data |> 
    group_by(type_of_waste, year) |>
    summarize(total_generated_waste = sum(generated_in_the_year, na.rm = TRUE)) |> 
    ungroup()
`summarise()` has grouped output by 'type_of_waste'. You can override using the
`.groups` argument.
# Plot 3: Waste by type
t3 <- ggplot(waste_by_type_year,
             aes(x = year, y = total_generated_waste, color = type_of_waste)) +
    geom_line() +
    geom_point() +
    labs(title = "Total Wood Waste Generation by Type and Year", 
         x = "Year", 
         y = "Total Generated Waste (tons)") +
    my_theme +
    scale_x_continuous(breaks = unique(waste_by_type_year$year))

waste_transferred <- combined_data |>
    group_by(year) |>
    summarize(total_sent = sum(waste_transferred_for_treatment_in_RS, na.rm = TRUE)) |>
    ungroup()

# Plot 4: Waste transferred
t4 <- ggplot(waste_transferred, aes(x = year, y = total_sent)) +
    geom_line(group = 1) +
    geom_point() +
    labs(title = "Total Wood Waste Transferred for Treatment by Year", 
         x = "Year", 
         y = "Total Waste Transferred (tons)") +
    my_theme +
    scale_x_continuous(breaks = unique(waste_transferred$year))

waste_stored_at_the_end_year <- combined_data |>
    group_by(year) |>
    summarize(total_stored = sum(temporarily_stored_end_year, na.rm = TRUE)) |>
    ungroup()

# Plot 5: Waste stored
t5 <- ggplot(waste_stored_at_the_end_year, aes(x = year, y = total_stored)) +
    geom_line(group = 1) +
    geom_point() +
    labs(title = "Total Wood Waste Stored (Temporarily) at the End of the Year", 
         x = "Year", 
         y = "Total Waste Stored (tons)") +
    my_theme +
    scale_x_continuous(breaks = unique(waste_stored_at_the_end_year$year))

# Convert to plotly (width and height parameters remain the same)
ggplotly(t, width = 1000, height = 500)
ggplotly(t2, width = 1000, height = 500)
ggplotly(t3, width = 1000, height = 500)
ggplotly(t4, width = 1000, height = 500)
ggplotly(t5, width = 1000, height = 500)

Collection Data Analysis Process

Waste Storage

  1. Data Loading:
    • The code starts by reading in waste storage data from multiple years (2016-2022) into separate dataframes.
  2. Data Preparation:
    • The column names of the 2021 and 2022 data are renamed to match the 2016 data, ensuring consistency across the datasets.
  3. Data Combination:
    • The data is combined into a single dataframe called combined_storage_data using bind_rows().
  4. Data Reshaping:
    • To prepare the data for visualization, the code reshapes the data using melt(), creating a long-form dataset with the following columns:
      • statistical_region
      • type_of_waste
      • year
      • storage_time (start or end of year)
      • total_storage
  5. Visualization 1 - Grouped Bar Plot:
    • The first visualization (t6) is a grouped bar plot showing the total waste stored at the start and end of each year, grouped by statistical region.
    • This visualization allows the user to see:
      • Differences in waste storage levels across regions.
      • Changes in waste storage within each year.
  6. Visualization 2 - Line Plot:
    • The second visualization (t7) is a line plot showing the total waste stored at the start and end of each year over time.
    • This provides an overview of how waste storage has changed over the years, both at the start and end of each year.

Code

# waste.storage.2023.data <- read_csv("2023 data/2023 filtered/collection/waste_received_23.csv")
waste.storage.2022.data <- read_csv("2022 data/2022 filtered/collection/waste_storage_22.csv")
Rows: 42 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, previous_stored_start_year, previous_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2021.data <- read_csv("2021 data/2021 filtered/collection/waste_storage_21.csv")
Rows: 38 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, previous_stored_start_year, previous_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2020.data <- read_csv("2020 data/2020 filtered/collection/waste_storage_20.csv")
Rows: 71 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2019.data <- read_csv("2019 data/2019 filtered/collection/waste_storage_19.csv")
Rows: 39 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2018.data <- read_csv("2018 data/2018 filtered/collection/waste_storage_18.csv")
Rows: 34 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2017.data <- read_csv("2017 data/2017 filtered/collection/waste_storage_17.csv")
Rows: 39 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2016.data <- read_csv("2016 data/2016 filtered/collection/waste_storage_16.csv")
Rows: 39 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.storage.2021.data) <- colnames(waste.storage.2016.data)
names(waste.storage.2022.data) <- colnames(waste.storage.2016.data)

combined_storage_data <- bind_rows(waste.storage.2016.data, waste.storage.2017.data, waste.storage.2018.data, waste.storage.2019.data, waste.storage.2020.data, waste.storage.2021.data, waste.storage.2022.data)

# clear waste names
combined_storage_data <- combined_storage_data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

# export the combined storage data to the data folder
write_csv(combined_storage_data, "Interface/Visualization/data/coll_storage_combined.csv")

# Reshape data for plotting
df_long <- melt(combined_storage_data,
                id.vars = c("statistical_region", "type_of_waste", "year"),
                measure.vars = c("waste_stored_start_year", "waste_stored_end_year"),
                variable.name = "storage_time", value.name = "total_storage"
)

# Grouped bar plot
t6 <- ggplot(df_long, aes(x = statistical_region, y = total_storage, fill = storage_time)) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(
        title = "Waste Storage at Start and End of the Year",
        x = "Statistical Region", y = "Total Stored Waste"
    ) +
    scale_fill_manual(
        values = c("waste_stored_start_year" = "blue", "waste_stored_end_year" = "red"),
        labels = c("Start of Year", "End of Year")
    ) +
    coord_flip() +
    theme_minimal()

ggplotly(t6, width = 1000, height = 500)
yearly_data <- combined_storage_data |>
    group_by(year) |>
    summarize(
        total_start = sum(waste_stored_start_year, na.rm = TRUE),
        total_end = sum(waste_stored_end_year, na.rm = TRUE)
    )

t7 <- ggplot(yearly_data, aes(x = year)) +
    geom_point(aes(y = total_start, color = "Start of Year")) +
    geom_point(aes(y = total_end, color = "End of Year")) +
    geom_line(aes(y = total_start, color = "Start of Year")) +
    geom_line(aes(y = total_end, color = "End of Year")) +
    labs(
        title = "Total Stored Waste Over Time",
        y = "Total Stored Waste",
        color = "Time of Year"
    ) +
    theme_minimal()

ggplotly(t7, width = 1000, height = 500)
type_data <- combined_storage_data |>
    group_by(type_of_waste, year) |>
    summarize(
        total_start = sum(waste_stored_start_year, na.rm = TRUE),
        total_end = sum(waste_stored_end_year, na.rm = TRUE)
    ) |>
    ungroup()
`summarise()` has grouped output by 'type_of_waste'. You can override using the
`.groups` argument.
t8 <- ggplot(type_data, aes(x = year, y = total_start, color = type_of_waste)) +
    geom_point(aes(y = total_start, color = "Start of Year")) +
    geom_point(aes(y = total_end, color = "End of Year")) +
    geom_line(aes(y = total_start, color = "Start of Year")) +
    geom_line(aes(y = total_end, color = "End of Year")) +
    labs(
        title = "Total Stored Waste by Type Over Time",
        y = "Total Stored Waste",
        color = "Time of Year"
    ) +
    theme_minimal()

ggplotly(t8, width = 1000, height = 500)
# outlier because of reporting or because of effects of COVID or something completely else?

Waste Received

  1. Data Loading:
    • Waste collection data from the years 2016 to 2022 is loaded into separate dataframes.
  2. Data Preparation:
    • The column names for the 2019, 2020, and 2021 data are renamed to match the 2022 data, ensuring consistency across all datasets.
  3. Data Combination:
    • The data from all years is combined into a single dataframe called combined_received_data using bind_rows().
  4. Data Reshaping:
    • The data is reshaped into a long-form format using the melt() function, resulting in columns for:
      • statistical_region
      • type_of_waste
      • year
      • source (indicating the source of the waste)
      • total_collected (indicating the amount of waste collected)
  5. Visualization 1 - Grouped Bar Plot:
    • A grouped bar plot (t8) is created to show the total waste collected from different sources, grouped by statistical region. This plot uses different colors to represent each waste source.
  6. Visualization 2 - Stacked Bar Plot:
    • A stacked bar plot (t8_stacked) is created to display the total waste collected by source, with the total_waste_collected layer placed behind the other sources. The bars are stacked and the plot is flipped horizontally using coord_flip().
  7. Visualization 3 - Line Plot:
    • A line plot (t9) is generated to show how the amount of waste collected from different sources has changed over time. The plot includes lines representing waste from:
      • Producers (No Record)
      • Producers (With Record)
      • Collectors (RS)
      • Processors (RS)
    • This provides a visual trend of waste collection across years.
  8. Interactive Plots:
    • The ggplotly() function is used to make the grouped bar plot, stacked bar plot, and line plot interactive, allowing for more detailed exploration of the data.

Code

waste.received.2016.data <- read_csv("2016 data/2016 filtered/collection/waste_collected_16.csv")
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.received.2017.data <- read_csv("2017 data/2017 filtered/collection/waste_collected_17.csv")
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.received.2018.data <- read_csv("2018 data/2018 filtered/collection/waste_collected_18.csv")
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.received.2019.data <- read_csv("2019 data/2019 filtered/collection/waste_collected_19.csv")
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, waste_received, waste_from_producers_no_record, waste_from_pr...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.received.2020.data <- read_csv("2020 data/2020 filtered/collection/waste_received_20.csv")
Rows: 52 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_received_from_producers_no_list,...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.received.2021.data <- read_csv("2021 data/2021 filtered/collection/waste_received_21.csv")
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_collected_waste, waste_from_producers_no_record, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.received.2022.data <- read_csv("2022 data/2022 filtered/collection/waste_received_22.csv")
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.received.2023.data <- read_csv("2023 data/2023 filtered/collection/waste_received_23.csv")
Rows: 54 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_collected, total_waste_collected_including_wareho...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.received.2019.data) <- colnames(waste.received.2022.data)
names(waste.received.2020.data) <- colnames(waste.received.2022.data)
names(waste.received.2021.data) <- colnames(waste.received.2022.data)

# for 2023 data,  if there is a column that has warehouse in its name, then exclude that column
waste.received.2023.data <- waste.received.2023.data |>
    select(-contains("warehouse"))

# Combine data
combined_received_data <- bind_rows(waste.received.2016.data, waste.received.2017.data, waste.received.2018.data, waste.received.2019.data, waste.received.2020.data, waste.received.2021.data, waste.received.2022.data, waste.received.2023.data)

# clear waste names
combined_received_data <- combined_received_data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

# export the combined received data to the data folder
write_csv(combined_received_data, "Interface/Visualization/data/coll_received_combined.csv")

# Reshape data for plotting
df_long_received <- melt(combined_received_data,
                         id.vars = c("statistical_region", "type_of_waste", "year"),
                         measure.vars = c("waste_from_producers_no_record", "waste_from_producers_with_record", "waste_from_collectors_RS", "waste_from_processors_RS"),
                         variable.name = "source", value.name = "total_collected"
)

# Calculate total collected waste (excluding total_waste_collected)
total_collected_per_region <- df_long_received |>
  group_by(statistical_region) |>
  summarize(total_collected = sum(total_collected, na.rm = TRUE)) |>
  arrange(desc(total_collected))

# Reorder the regions in descending order of total_collected
df_long_received$statistical_region <- factor(df_long_received$statistical_region, 
                                              levels = total_collected_per_region$statistical_region)

# Colorblind-friendly palette (Okabe-Ito)
color_palette <- c("waste_from_producers_no_record" = "#E69F00",  # Orange
                   "waste_from_producers_with_record" = "#56B4E9",  # Sky blue
                   "waste_from_collectors_RS" = "#009E73",  # Green
                   "waste_from_processors_RS" = "#F0E442")  # Yellow

# Stacked bar plot
t8_stacked <- ggplot(df_long_received, aes(x = statistical_region, y = total_collected, fill = source)) +
    geom_col() + 
    labs(
        title = "Waste Received by Source",
        x = "Statistical Region", y = "Total Collected Waste"
    ) +
    scale_fill_manual(
        values = color_palette,
        labels = c("From Producers (No Record)", "From Producers (With Record)", "From Collectors (RS)", "From Processors (RS)")
    ) +
    # coord_flip() +
    my_theme

# Faceted bar plot by source
t8_faceted <- ggplot(df_long_received, aes(x = statistical_region, y = total_collected, fill = source)) +
    geom_bar(stat = "identity", position = "dodge") +
    facet_wrap(~ source, ncol = 1, scales = "free_y") +  # Create a facet for each source
    labs(
        title = "Waste Received by Source",
        x = "Statistical Region", y = "Total Collected Waste"
    ) +
    scale_fill_manual(
        values = color_palette,
        labels = c("From Producers (No Record)", "From Producers (With Record)", "From Collectors (RS)", "From Processors (RS)")
    ) +
    my_theme

# Grouped bar plot
t8_grouped <- ggplot(df_long_received, aes(x = statistical_region, y = total_collected, fill = source)) +
    geom_bar(stat = "identity", position = "dodge") +  # Group the bars by source
    labs(
        title = "Waste Received by Source",
        x = "Statistical Region", y = "Total Collected Waste"
    ) +
    scale_fill_manual(
        values = color_palette,
        labels = c("From Producers (No Record)", "From Producers (With Record)", "From Collectors (RS)", "From Processors (RS)")
    ) +
    my_theme

ggplotly(t8_stacked, width = 1000, height = 1000)
ggplotly(t8_faceted, width = 1000, height = 1000)
ggplotly(t8_grouped, width = 1000, height = 1000)
# Year plot
yearly_data_received <- combined_received_data |>
  group_by(year) |>
  summarize(
    total_collected = sum(total_waste_collected, na.rm = TRUE),
    from_producers_no_record = sum(waste_from_producers_no_record, na.rm = TRUE),
    from_producers_with_record = sum(waste_from_producers_with_record, na.rm = TRUE),
    from_collectors_RS = sum(waste_from_collectors_RS, na.rm = TRUE),
    from_processors_RS = sum(waste_from_processors_RS, na.rm = TRUE)
  )

# Line and point plot with all years shown
t9 <- ggplot(yearly_data_received, aes(x = year)) +
  geom_point(aes(y = from_producers_no_record, color = "From Producers (No Record)")) +
  geom_line(aes(y = from_producers_no_record, color = "From Producers (No Record)")) +
  geom_point(aes(y = from_producers_with_record, color = "From Producers (With Record)")) +
  geom_line(aes(y = from_producers_with_record, color = "From Producers (With Record)")) +
  geom_point(aes(y = from_collectors_RS, color = "From Collectors (RS)")) +
  geom_line(aes(y = from_collectors_RS, color = "From Collectors (RS)")) +
  geom_point(aes(y = from_processors_RS, color = "From Processors (RS)")) +
  geom_line(aes(y = from_processors_RS, color = "From Processors (RS)")) +
  labs(
    title = "Waste Collected by Source Over Time",
    y = "Waste Collected",
    color = "Source"
  ) +
  my_theme +
  scale_x_continuous(breaks = unique(yearly_data_received$year))

ggplotly(t9, width = 1000, height = 500)

Municipal Waste Collected

Problem Statement:

  • Challenge: The code is attempting to create location data for companies based on 2018 data. The 2018 dataset contains address information within the name_of_company column. However, it is not guaranteed that the address from 2018 is the same for the company in 2019.
  • Assumption: For this analysis, the code assumes that the address for a company in 2019 is the same as in 2018.

Data Processing Steps:

  1. Data Loading:
    • The municipal waste collection data from 2018 to 2022 is loaded into separate dataframes.
  2. Data Preparation:
    • The column names in the 2018 data are aligned with those in the 2019 data for consistency.
    • The waste type names in the datasets for all years are cleaned using a function clear_waste_name().
  3. Municipality of Origin Determination:
    • For the 2018 data, the company name is separated from the full address, and the municipality name is extracted from the address. The extracted address and municipality name are added as new columns to the dataframe.
  4. Address and Municipality Matching:
    • The 2018 municipality and address data is joined with the 2019 data based on the name_of_company field.
    • For cases where municipality or address data is missing in 2019, “UNDEFINED” is used as a placeholder.
  5. Statistical Region Mapping:
    • The municipality_region_mapping dataset is used to map each municipality to its corresponding statistical region for both 2018 and 2019 data.
  6. Data Correction:
    • The names of certain municipalities (e.g., “IZOLA - ISOLA”, “KOPER - CAPODISTRIA”) are standardized in both the 2018 and 2019 datasets.
    • The address, municipality name, and statistical region for specific companies (e.g., “KOMUNALA RADGONA, javno podjetje d.o.o.”) are manually corrected in the 2019 data.
  7. Data Aggregation:
    • The datasets for each year are grouped by year, statistical_region, name_of_municipality, and type_of_waste, and the total waste collected is summarized.
  8. Data Combination:
    • The processed datasets from 2018 to 2022 are combined into a single dataframe, combined_municipal_data.
  9. Data Reshaping:
    • The combined data is reshaped into a long format suitable for plotting using melt(), with columns for year, statistical_region, name_of_municipality, type_of_waste, and total_collected.

Visualizations:

  1. Total Waste Collected by Municipality and Region (2018-2022):
    • A line plot (t10) is created to show the total waste collected by municipality and region from 2018 to 2022, with separate lines for each type of waste. The plot is faceted by statistical region.
  2. Top 5 Municipalities by Total Waste Collected (2018-2022):
    • The top 5 municipalities based on total waste collected are identified.
    • A line plot (t11) is generated to visualize the total waste collected over the years for these top 5 municipalities, with separate lines for each municipality and waste type.

Code

waste.management.2018.data <- read_csv("2018 data/2018 filtered/collection/municipal_waste_collected_18.csv")
Rows: 131 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (4): year, total_waste_collected, collected_from_households, collected_f...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2019.data <- read_csv("2019 data/2019 filtered/collection/waste_collected_by_municipality_of_origin_19.csv")
Rows: 134 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (4): year, total_waste_collected, municipal_waste_collected, waste_colle...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2020.data <- read_csv("2020 data/2020 filtered/collection/management_of_collected_waste_20.csv")
Rows: 470 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): type_of_waste, name_of_municipality, statistical_region
dbl (2): year, municipal_waste_collected_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2021.data <- read_csv("2021 data/2021 filtered/collection/waste_municipality_of_origin_21.csv")
Rows: 468 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_municipality, type_of_waste, statistical_region
dbl (2): year, waste_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2022.data <- read_csv("2022 data/2022 filtered/collection/waste_municipality_of_origin_22.csv")
Rows: 494 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, name_of_municipality
dbl (2): year, waste_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2023.data <- read_csv("2023 data/2023 filtered/collection/waste_municipality_of_origin_23.csv")
Rows: 604 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): statistical_region, type_of_waste, name_of_municipality, waste_coll...
dbl (2): year, waste_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2019.data <- waste.management.2019.data |>
    select(year, name_of_company, type_of_waste, total_waste_collected, municipal_waste_collected, waste_collected_from_activities)

colnames(waste.management.2018.data) <- colnames(waste.management.2019.data)

waste.management.2018.data <- waste.management.2018.data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.management.2019.data <- waste.management.2019.data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.management.2020.data <- waste.management.2020.data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.management.2021.data <- waste.management.2021.data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.management.2022.data <- waste.management.2022.data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.management.2023.data <- waste.management.2023.data |> 
  mutate(type_of_waste = clear_waste_name(type_of_waste))

# define municipality of origin
waste.management.2018.data <- waste.management.2018.data |>
    separate(name_of_company, into = c("company_name", "full_address"), sep = "; ", extra = "merge") |>
    mutate(address = full_address) |>
    mutate(name_of_municipality = str_extract(full_address, "\\d+\\s(.+)$") |>
               str_replace("^\\d+\\s", "")) |>
    select(-full_address)

# now based on name_of_company of 2018, add the new columns to 2019 data based on 2018 data
waste.management.2019.data <- waste.management.2019.data |>
    left_join(waste.management.2018.data |> select(company_name, address, name_of_municipality), by = c("name_of_company" = "company_name"))
Warning in left_join(waste.management.2019.data, select(waste.management.2018.data, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 3 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
waste.management.2019.data <- waste.management.2019.data |> distinct()

# if NA, then undefined
waste.management.2019.data <- waste.management.2019.data |>
    mutate(
        name_of_municipality = ifelse(is.na(name_of_municipality), "UNDEFINED", name_of_municipality),
        address = ifelse(is.na(address), "UNDEFINED", address)
    )

# add column for statistical regions based on name_of_municipality
waste.management.2018.data <- waste.management.2018.data |>
    left_join(municipality_region_mapping, by = "name_of_municipality")


# add column for statistical regions based on name_of_municipality
waste.management.2019.data <- waste.management.2019.data |>
    left_join(municipality_region_mapping, by = "name_of_municipality")


waste.management.2018.data <- waste.management.2018.data |>
    mutate(
        name_of_municipality = ifelse(name_of_municipality == "IZOLA - ISOLA", "IZOLA", name_of_municipality),
        name_of_municipality = ifelse(name_of_municipality == "KOPER - CAPODISTRIA", "KOPER", name_of_municipality),
        name_of_municipality = ifelse(name_of_municipality == "PIRAN - PIRANO", "PIRAN", name_of_municipality),
        name_of_municipality = ifelse(name_of_municipality == "LENDAVA - LENDVA", "LENDAVA", name_of_municipality)
    )

waste.management.2019.data <- waste.management.2019.data |>
    mutate(
        name_of_municipality = ifelse(name_of_municipality == "IZOLA - ISOLA", "IZOLA", name_of_municipality),
        name_of_municipality = ifelse(name_of_municipality == "KOPER - CAPODISTRIA", "KOPER", name_of_municipality),
        name_of_municipality = ifelse(name_of_municipality == "PIRAN - PIRANO", "PIRAN", name_of_municipality),
        name_of_municipality = ifelse(name_of_municipality == "LENDAVA - LENDVA", "LENDAVA", name_of_municipality)
    )

# change the address, name_of municipality and statistical region for KOMUNALA RADGONA, javno podjetje d.o.o.
waste.management.2019.data <- waste.management.2019.data |>
    mutate(
        address = ifelse(name_of_company == "KOMUNALA RADGONA, javno podjetje d.o.o.", "PARTIZANSKA CESTA 13, 9250 GORNJA RADGONA", address),
        name_of_municipality = ifelse(name_of_company == "KOMUNALA RADGONA, javno podjetje d.o.o.", "GORNJA RADGONA", name_of_municipality),
        statistical_region = ifelse(name_of_company == "KOMUNALA RADGONA, javno podjetje d.o.o.", "POMURSKA", statistical_region)
    )

# change the address, name_of municipality and statistical region for JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.
waste.management.2019.data <- waste.management.2019.data |>
    mutate(
        address = ifelse(name_of_company == "JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.", "VODOVODNA CESTA 90, 1001 LJUBLJANA", address),
        name_of_municipality = ifelse(name_of_company == "JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.", "LJUBLJANA", name_of_municipality),
        statistical_region = ifelse(name_of_company == "JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.", "OSREDNJESLOVENSKA", statistical_region)
    )

# remove company data as it is not needed for now
waste.management.2018.data <- waste.management.2018.data |> select(-company_name, -address)
waste.management.2019.data <- waste.management.2019.data |> select(-name_of_company, -address)

# focus only on total waste collected for all years
waste.management.2018.data <- waste.management.2018.data |>
    group_by(year, statistical_region, name_of_municipality, type_of_waste) |>
    summarize(total_waste_collected = sum(total_waste_collected, na.rm = TRUE)) |>
    ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
waste.management.2019.data <- waste.management.2019.data |>
    group_by(year, statistical_region, name_of_municipality, type_of_waste) |>
    summarize(total_waste_collected = sum(total_waste_collected, na.rm = TRUE)) |>
    ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
waste.management.2020.data <- waste.management.2020.data |>
    group_by(year, statistical_region, name_of_municipality, type_of_waste) |>
    summarize(total_waste_collected = sum(municipal_waste_collected_by_municipality, na.rm = TRUE)) |>
    ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
waste.management.2021.data <- waste.management.2021.data |>
    group_by(year, statistical_region, name_of_municipality, type_of_waste) |>
    summarize(total_waste_collected = sum(waste_by_municipality, na.rm = TRUE)) |>
    ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
waste.management.2022.data <- waste.management.2022.data |>
    group_by(year, statistical_region, name_of_municipality, type_of_waste) |>
    summarize(total_waste_collected = sum(waste_by_municipality, na.rm = TRUE)) |>
    ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
waste.management.2023.data <- waste.management.2023.data |>
    group_by(year, statistical_region, name_of_municipality, type_of_waste) |>
    summarize(total_waste_collected = sum(waste_by_municipality, na.rm = TRUE)) |>
    ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
combined_municipal_data <- bind_rows(waste.management.2018.data, waste.management.2019.data, waste.management.2020.data, waste.management.2021.data, waste.management.2022.data, waste.management.2023.data)

# clear waste name
combined_municipal_data <- combined_municipal_data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

# export the combined municipal data to the data folder
write_csv(combined_municipal_data, "Interface/Visualization/data/coll_municipal_combined.csv")

# Reshape data for plotting
df_long_municipal <- melt(
    combined_municipal_data,
    id.vars = c(
        "year",
        "statistical_region",
        "name_of_municipality",
        "type_of_waste"
    ),
    measure.vars = c("total_waste_collected"),
    variable.name = "source",
    value.name = "total_collected"
)

t10 <- ggplot(df_long_municipal, aes(x = year, y = total_collected, color = name_of_municipality, group = type_of_waste)) +
  geom_line() +
  geom_point() +
  facet_wrap(~statistical_region, scales = "free_y") +
  labs(title = "Total Waste Collected by Municipality and Region (2018-2022)",
       x = "Year",
       y = "Total Waste Collected (in tons)",
       color = "Municipality") +
  my_theme

ggplotly(t10, width = 1000, height = 1000)
# Calculate total waste for each municipality
top_municipalities <- df_long_municipal |>
  group_by(name_of_municipality) |>
  summarise(total_waste = sum(total_collected, na.rm = TRUE)) |>
  top_n(5, total_waste) |>
  pull(name_of_municipality)

# Filter the original dataset
df_top_municipal <- df_long_municipal |>
  filter(name_of_municipality %in% top_municipalities)

# Plot
t11 <- ggplot(df_top_municipal, aes(x = year, y = total_collected, color = name_of_municipality, group = name_of_municipality)) +
  geom_line() +
  geom_point() +
  labs(title = "Total Waste Collected by Top 5 Municipalities (2018-2022)",
       x = "Year",
       y = "Total Waste Collected (in tons)",
       color = "Municipality") +
  facet_wrap(~type_of_waste) +
  my_theme +
  theme(legend.position = "bottom")

ggplotly(t11, width = 1000, height = 1000)

Municipal Waste Collected by Municipality of Origin

This section of the analysis focuses on the municipal waste collected by the municipality of origin from 2018 to 2022. The steps involved in the code are summarized as follows:

  1. Data Loading:
    • Municipal waste collection data is read from CSV files for the years 2018 to 2022.
  2. Data Summarization:
    • For each year, the data is grouped by year, statistical_region, name_of_municipality, and type_of_waste.
    • The total waste collected is summarized using the sum() function.
  3. Column Selection and Renaming:
    • Relevant columns are selected and renamed to ensure consistency across different years.
  4. Data Cleaning:
    • The type_of_waste column is cleaned using a custom function (clear_waste_name).
  5. Data Combination:
    • Data from all years is combined into a single dataset using bind_rows().
  6. Data Reshaping:
    • The combined data is reshaped for plotting using the melt() function.
  7. Visualization:
    • A stacked bar plot is created to visualize the total waste collected by municipality of origin over the years.
    • The plot displays the amount of waste collected in tons, with different municipalities distinguished by color.

Code

waste.collected.2018.data <- read_csv("2018 data/2018 filtered/collection/municipal_waste_collected_separated_by_municipality_of_origin_18.csv")
Rows: 580 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): name_of_company, statistical_region, name_of_municipality, type_of_...
dbl (2): year, total_waste_collected

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2019.data <- read_csv("2019 data/2019 filtered/collection/municipal_waste_collected_by_municipality_of_origin_19.csv")
Rows: 413 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, total_waste_collected

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2020.data <- read_csv("2020 data/2020 filtered/collection/management_of_collected_waste_20.csv")
Rows: 470 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): type_of_waste, name_of_municipality, statistical_region
dbl (2): year, municipal_waste_collected_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2021.data <- read_csv("2021 data/2021 filtered/collection/waste_municipality_of_origin_21.csv")
Rows: 468 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_municipality, type_of_waste, statistical_region
dbl (2): year, waste_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2022.data <- read_csv("2022 data/2022 filtered/collection/waste_municipality_of_origin_22.csv")
Rows: 494 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, name_of_municipality
dbl (2): year, waste_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2023.data <- read_csv("2023 data/2023 filtered/collection/waste_municipality_of_origin_23.csv")
Rows: 604 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): statistical_region, type_of_waste, name_of_municipality, waste_coll...
dbl (2): year, waste_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# summarize by year, statistical region, name of municipality and type of waste
waste.collected.2018.data <- waste.collected.2018.data |>
  group_by(year, statistical_region, type_of_waste, name_of_municipality) |>
  summarize(waste_by_municipality = sum(total_waste_collected, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'type_of_waste'. You can override using the `.groups` argument.
waste.collected.2019.data <- waste.collected.2019.data |>
  group_by(year, statistical_region, type_of_waste, name_of_municipality) |>
  summarize(waste_by_municipality = sum(total_waste_collected, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'type_of_waste'. You can override using the `.groups` argument.
waste.collected.2020.data <- waste.collected.2020.data |>
  select(year, statistical_region, type_of_waste, name_of_municipality, municipal_waste_collected_by_municipality) |> 
  rename(waste_by_municipality = municipal_waste_collected_by_municipality)

waste.collected.2021.data <- waste.collected.2021.data |>
  select(year, statistical_region, type_of_waste, name_of_municipality, waste_by_municipality)

waste.collected.2023.data <- waste.collected.2023.data |>
  group_by(year, statistical_region, type_of_waste, name_of_municipality) |> 
  summarize(waste_by_municipality = sum(waste_by_municipality, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'type_of_waste'. You can override using the `.groups` argument.
# clear_waste_name
waste.collected.2018.data <- waste.collected.2018.data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.collected.2019.data <- waste.collected.2019.data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.collected.2020.data <- waste.collected.2020.data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.collected.2021.data <- waste.collected.2021.data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.collected.2022.data <- waste.collected.2022.data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

waste.collected.2023.data <- waste.collected.2023.data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

# combine data
combined_collected_data <- bind_rows(waste.collected.2018.data, waste.collected.2019.data, waste.collected.2020.data, waste.collected.2021.data, waste.collected.2022.data, waste.collected.2023.data)

# export the combined collected data to the data folder
write_csv(combined_collected_data, "Interface/Visualization/data/coll_municipal_collected_combined.csv")

# Reshape data for plotting
df_long_collected <- melt(
    combined_collected_data,
    id.vars = c(
        "year",
        "statistical_region",
        "name_of_municipality",
        "type_of_waste"
    ),
    measure.vars = c("waste_by_municipality"),
    variable.name = "source",
    value.name = "total_collected"
)

# stacked bar plot
t12 <- ggplot(df_long_collected, aes(x = year, y = total_collected, fill = name_of_municipality, group = statistical_region)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Total Waste Collected by Municipality of Origin",
       x = "Year",
       y = "Total Waste Collected (in tons)",
       fill = "Municipality") +
  my_theme

ggplotly(t12, width = 1000, height = 1000)

Municipal Waste Management

This section analyzes municipal waste management from 2016 to 2022, focusing on various sources of waste management across different regions and waste types. The steps involved in the code are summarized below:

  1. Data Loading:
    • Municipal waste management data is loaded from CSV files for the years 2016 to 2022.
  2. Data Preparation:
    • Column names are standardized across different years to ensure consistency.
    • Missing columns for certain years (like waste_sent_to_non_EU) are added and initialized to zero.
  3. Data Combination:
    • Data from all years is combined into a single dataset using bind_rows().
  4. Data Filtering:
    • Rows with statistical_region labeled as “NEOPREDELJENO” are filtered out.
  5. Data Reshaping:
    • The combined data is reshaped for plotting using the melt() function, focusing on variables related to waste management.
  6. Visualizations:
    • Stacked Bar Plot: Visualizes the total waste given away by different sources for each statistical region using ggplot and ggplotly.
    • Bubble Plot: Shows the total waste given away by year, with bubble sizes representing the amount of waste.
    • Stacked Bar Plot by Year: Visualizes total waste by region and type across different years, with bars stacked by waste type.
    • Time Trend Line Plot: Illustrates waste management trends over time by statistical region.
    • Time Trend by Waste Type: Uses area plots to show waste composition over time by region.
    • Heatmap: Displays waste management intensity across regions and years.
    • Sparklines: Normalized line plots showing waste management trends by region in a compact format.

Code

waste.management.2016.data <- read_csv("2016 data/2016 filtered/collection/management_of_waste_collected_16.csv")
Rows: 51 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_given_away, waste_given_to_processing_operator_RS...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2017.data <- read_csv("2017 data/2017 filtered/collection/management_of_waste_collected_17.csv")
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_given_to_collector_RS, waste_gi...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2018.data <- read_csv("2018 data/2018 filtered/collection/waste_management_18.csv")
Rows: 52 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_given_away, delivered_to_collector, delivered_to_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2019.data <- read_csv("2019 data/2019 filtered/collection/waste_handling_19.csv")
Rows: 51 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_delivered, waste_delivered_to_other_collector_RS,...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2020.data <- read_csv("2020 data/2020 filtered/collection/home_composting_20.csv")
Rows: 52 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2021.data <- read_csv("2021 data/2021 filtered/collection/municipal_waste_management_21.csv")
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2022.data <- read_csv("2022 data/2022 filtered/collection/municipal_waste_management_22.csv")
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2023.data <- read_csv("2023 data/2023 filtered/collection/municipal_waste_management_23.csv") 
Rows: 54 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.management.2017.data) <- colnames(waste.management.2022.data)
names(waste.management.2020.data) <- colnames(waste.management.2022.data)
names(waste.management.2021.data) <- colnames(waste.management.2022.data)

waste.management.2016.data <- waste.management.2016.data |> 
  mutate(waste_sent_to_non_EU = 0)

waste.management.2018.data <- waste.management.2018.data |> 
  mutate(waste_sent_to_non_EU = 0)

waste.management.2019.data <- waste.management.2019.data |> 
  mutate(waste_sent_to_non_EU = 0)

names(waste.management.2016.data) <- colnames(waste.management.2022.data)
names(waste.management.2018.data) <- colnames(waste.management.2022.data)
names(waste.management.2019.data) <- colnames(waste.management.2022.data)

combined_management_data <- bind_rows(waste.management.2016.data, waste.management.2017.data, waste.management.2018.data, waste.management.2019.data, waste.management.2020.data, waste.management.2021.data, waste.management.2022.data, waste.management.2023.data)

# clear waste name
combined_management_data <- combined_management_data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

# filter out statistical_region = "NEOPREDELJENO"
combined_management_data <- combined_management_data |>
    filter(statistical_region != "NEOPREDELJENO")

# export
write_csv(combined_management_data, "Interface/Visualization/data/coll_management_combined.csv")

# we only have 1 instance of waste_sent_to_non_EU 
# combined_management_data <- combined_management_data |> select(-waste_sent_to_non_EU)

# Reshape data for plotting
df_long_management <- melt(
    combined_management_data,
    id.vars = c(
        "statistical_region",
        "type_of_waste",
        "year"
    ),
    measure.vars = c(
        "waste_handed_to_collectors_RS",
        "waste_delivered_to_operators_RS",
        "waste_sent_to_EU",
        "waste_sent_to_non_EU"
    ),
    variable.name = "source",
    value.name = "total_waste_given_away"
)

# stacked bar plot
t13 <- ggplot(df_long_management, aes(x = statistical_region, y = total_waste_given_away, fill = source)) +
  geom_bar(stat = "identity") +
  labs(title = "Waste Management by Source",
       x = "Statistical Region",
       y = "Total Waste Given Away",
       fill = "Source") +
  coord_flip() +
  my_theme

ggplotly(t13, width = 1000, height = 500)
# define a bubble plot to see the data through the years
t14 <- ggplot(combined_management_data, aes(x = year, y = total_waste_given_away, size = total_waste_given_away, color = type_of_waste, group = statistical_region)) +
  geom_point() +
  labs(title = "Total Waste Given Away by Year",
       x = "Year",
       y = "Total Waste Given Away",
       color = "Type of Waste") +
  my_theme

ggplotly(t14, width = 1000, height = 500)
df_aggregated <- df_long_management |>
  group_by(statistical_region, type_of_waste, year) |>
  summarise(total_amount = sum(total_waste_given_away, na.rm = TRUE))
`summarise()` has grouped output by 'statistical_region', 'type_of_waste'. You
can override using the `.groups` argument.
t15 <- ggplot(df_aggregated, aes(x = statistical_region, y = total_amount, fill = type_of_waste)) +
  geom_bar(stat = "identity", position = "stack") +
  facet_wrap(~ year) +
  my_theme + 
  labs(title = "Total Waste by Region and Type",
       x = "Statistical Region",
       y = "Total Amount",
       fill = "Type of Waste")

ggplotly(t15, width = 1000, height = 1000)
df_time_trend <- df_long_management |>
  group_by(statistical_region, year) |>
  summarise(total_amount = sum(total_waste_given_away, na.rm = TRUE))
`summarise()` has grouped output by 'statistical_region'. You can override
using the `.groups` argument.
t16 <- ggplot(df_time_trend, aes(x = year, y = total_amount, color = statistical_region, group = statistical_region)) +
  geom_line() +
  geom_point() +
  my_theme
  labs(title = "Waste Management Trend Over Time by Region",
       x = "Year",
       y = "Total Amount",
       color = "Statistical Region") +
  theme(legend.position = "bottom")
NULL
ggplotly(t16, width = 1000, height = 1000)
df_time_trend_by_type <- df_long_management |>
  group_by(statistical_region, year, type_of_waste) |> 
  summarise(total_amount = sum(total_waste_given_away, na.rm = TRUE))
`summarise()` has grouped output by 'statistical_region', 'year'. You can
override using the `.groups` argument.
t17 <- ggplot(df_time_trend_by_type, aes(x = year, y = total_amount, fill = type_of_waste)) +
  geom_area() +
  facet_wrap(~statistical_region, scales = "free_y") +
  scale_fill_brewer(palette = "Set1") +
  my_theme +
  labs(title = "Waste Composition Over Time by Region",
       x = "Year",
       y = "Total Amount",
       fill = "Type of Waste") +
  theme(legend.position = "bottom")

ggplotly(t17, width = 1000, height = 1000)
t18 <- ggplot(df_time_trend, aes(x = year, y = statistical_region, fill = total_amount)) +
  geom_tile() +
  scale_fill_viridis_c() +
  my_theme +
  labs(title = "Waste Management Heatmap",
       x = "Year",
       y = "Statistical Region",
       fill = "Total Amount")

ggplotly(t18, width = 1000, height = 1000)
df_sparklines <- df_time_trend |>
  group_by(statistical_region) |>
  mutate(normalized_amount = (total_amount - min(total_amount)) / 
           (max(total_amount) - min(total_amount))) |>
  ungroup()

t19 <- ggplot(df_sparklines, aes(x = year, y = normalized_amount, group = statistical_region)) +
  geom_line() +
  facet_wrap(~ statistical_region, ncol = 4) +
  my_theme +
  theme(axis.text.y = element_blank(),
        axis.title = element_blank(),
        panel.grid = element_blank()) +
  labs(title = "Waste Management Trends by Region (Normalized)")

ggplotly(t19, width = 1000, height = 1000)

Household composting - No relevant data

Treatment Data Analysis Process

Waste Storage

  1. Load Waste Storage Data
  • Read CSV files for waste storage data from 2016 to 2022.
  • Aggregate 2021 data by year, statistical_region, and type_of_waste.
  • For 2022, set waste_stored_end_year to 0.
  1. Combine Data
  • Combine all years of waste storage data into a single dataframe.
  • Factor the year column.
  1. Add Lagged Column
  • Add a lagged column to compare the end year with the next year’s start year using lead().
  1. Identify Matching Instances
  • Filter for instances where waste_stored_end_year matches the next year’s start year.
  • Create a summary table showing matches by year, region, and type of waste.
  1. Plot Waste Storage Data
  • Create a line plot using ggplot2 to visualize waste storage data from 2018 to 2022.
  • Mark matching instances with red points.
  • Convert the plot to an interactive plot using plotly.
  1. Complete Data for Analysis
  • Ensure every combination of statistical_region, type_of_waste, and year exists using complete().
  • Add a lagged column for the previous year’s end year and calculate the difference.
  1. Flags for “Outside Period”
  • Create flags indicating whether there are gaps in data.
  • Replace NAs with zeros for plotting purposes.
  1. Significant Change Analysis
  • Define a threshold for significant change (e.g., 10% increase or decrease).
  • Flag changes in waste storage amounts.
  1. Filter Data by Specific Waste Type
  • Filter the complete data for “Paper/Cardboard Packaging” in the “OSREDNJESLOVENSKA” region.
  1. Visualize Changes
  • Create a plot to visualize changes in waste amounts and mark significant changes with symbols.
  • Convert to an interactive plot using plotly.
  1. Prepare Data for Waterfall Chart
  • Create a dataset for a waterfall plot comparing waste amounts at year’s end and next year’s start.
  • Transform data for plotting.
  1. Create Waterfall Plot
  • Create a waterfall plot to visualize changes in waste amounts, using colors to indicate increases, decreases, and end-year amounts.
  • Convert the waterfall plot to an interactive plot using plotly.
  1. Variant Waterfall Chart
  • Prepare a variant dataset for a side-by-side bar comparison.
  • Create a variant waterfall plot for the same waste type and region.

Code

waste.storage.2016.data <- read_csv("2016 data/2016 filtered/treatment/waste_storage_16.csv")
Rows: 30 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2017.data <- read_csv("2017 data/2017 filtered/treatment/waste_storage_17.csv")
Rows: 24 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2018.data <- read_csv("2018 data/2018 filtered/treatment/waste_storage_18.csv")
Rows: 15 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2019.data <- read_csv("2019 data/2019 filtered/treatment/waste_storage_19.csv")
Rows: 29 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2020.data <- read_csv("2020 data/2020 filtered/treatment/waste_storage_20.csv")
Rows: 31 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2021.data <- read_csv("2021 data/2021 filtered/treatment/waste_storage_21.csv")
Rows: 31 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2022.data <- read_csv("2022 data/2022 filtered/treatment/prev_treatment_22.csv")
Rows: 10 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, quantity_stored_start_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.storage.2023.data <- read_csv("2023 data/2023 filtered/treatment/prev_treatment_23.csv")
Rows: 55 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): statistical_region, type_of_waste_inlet, type_of_waste_outlet, subs...
dbl (2): year, stored_quantity_start_year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.storage.2023.data)
[1] "year"                       "statistical_region"        
[3] "type_of_waste_inlet"        "type_of_waste_outlet"      
[5] "substance_or_object"        "stored_quantity_start_year"
dput(head(waste.storage.2023.data))
structure(list(year = c(2023, 2023, 2023, 2023, 2023, 2023), 
    statistical_region = c("GORENJSKA", "GORENJSKA", "GORENJSKA", 
    "GORENJSKA", "GORENJSKA", "GORENJSKA"), type_of_waste_inlet = c("Bark and Cork Waste", 
    "Bulky Waste", "Paper/Cardboard Packaging", "Sawdust/Chips/Wood, not specified in 03 01 04", 
    "Sawdust/Chips/Wood, not specified in 03 01 04", "Sawdust/Chips/Wood, not specified in 03 01 04"
    ), type_of_waste_outlet = c("undefined", "undefined", "Paper/Cardboard Packaging", 
    "Wood, not specified in 19 12 06", "undefined", "undefined"
    ), substance_or_object = c("99 – drugo", "7 – les", "undefined", 
    "undefined", "7 – les", "99 – drugo"), stored_quantity_start_year = c(0, 
    23.423, 4.565, 8.781, 0, 0)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))
flow_2023 <- waste.storage.2023.data 

# the flow is defined by the type of waste inlet, to type of waste outlet and the final node is the product
# Create separate nodes for inlet and outlet stages
inlet_nodes <- paste0(unique(flow_2023$type_of_waste_inlet), " (Inlet)")
outlet_nodes <- paste0(unique(flow_2023$type_of_waste_outlet), " (Outlet)")
product_nodes <- paste0(unique(flow_2023$substance_or_object), " (Product)")

print(outlet_nodes)
[1] "undefined (Outlet)"                                                                                          
[2] "Paper/Cardboard Packaging (Outlet)"                                                                          
[3] "Wood, not specified in 19 12 06 (Outlet)"                                                                    
[4] "Sawdust/Chips/Wood, not specified in 03 01 04 (Outlet)"                                                      
[5] "Wooden packaging (Outlet)"                                                                                   
[6] "Other Wastes (including mixtures of materials) from Mechanical Treatment, not specified in 19 12 11 (Outlet)"
[7] "Wood, not specified in 20 01 37 (Outlet)"                                                                    
# Combine all nodes in the correct order
nodes <- c(inlet_nodes, outlet_nodes, product_nodes)

# Create links for both stages
links_stage1 <- flow_2023 |>
  mutate(
    source = paste0(type_of_waste_inlet, " (Inlet)"),
    target = paste0(type_of_waste_outlet, " (Outlet)")
  ) |>
  group_by(source, target) |>
  summarise(value = sum(stored_quantity_start_year)) |>
  ungroup()
`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.
links_stage2 <- flow_2023 |>
  mutate(
    source = paste0(type_of_waste_outlet, " (Outlet)")
  ) |>
  group_by(
    source,
    target = paste0(substance_or_object, " (Product)")
  ) |>
  summarise(value = sum(stored_quantity_start_year)) |>
  ungroup()
`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.
# Combine all links
all_links <- bind_rows(links_stage1, links_stage2)

# Create the Sankey diagram
fig <- plot_ly(
  type = "sankey",
  orientation = "h",
  
  node = list(
    label = nodes,
    pad = 15,
    thickness = 20,
    line = list(
      color = "black",
      width = 0.5
    )
  ),
  
  link = list(
    source = match(all_links$source, nodes) - 1,
    target = match(all_links$target, nodes) - 1,
    value = all_links$value,
    color = ifelse(
      gsub(" \\(Inlet\\)", "", all_links$source) == gsub(" \\(Outlet\\)", "", all_links$target),
      "rgba(255,0,0,0.5)",  # Red for matching types
      "rgba(0,150,200,0.5)" # Blue for different types
    )
  ),
  
  height = 800,
  width = 1200
)

# Update layout
fig <- fig |> layout(
  title = "Waste Flow Diagram: Inlet → Outlet → Product",
  font = list(size = 10)
)

# Display the plot
fig
waste.storage.2021.data <- waste.storage.2021.data |> 
  group_by(year, statistical_region, type_of_waste) |>
  summarize(waste_stored_start_year = sum(waste_stored_start_year, na.rm = TRUE),
            waste_stored_end_year = sum(waste_stored_end_year, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'year', 'statistical_region'. You can
override using the `.groups` argument.
waste.storage.2022.data <- waste.storage.2022.data |> 
  mutate(waste_stored_end_year = 0)

names(waste.storage.2022.data) <- colnames(waste.storage.2021.data)

filtered_2023 <- waste.storage.2023.data |> 
  group_by(year, statistical_region, type_of_waste_inlet) |>
  summarize(waste_stored_start_year = sum(stored_quantity_start_year, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'year', 'statistical_region'. You can
override using the `.groups` argument.
filtered_2023 <- filtered_2023 |> 
  mutate(waste_stored_end_year = 0)

# filter only by wood, so if the rows contain Wood/Paper in the type_of_waste_inlet column
filtered_2023 <- filtered_2023 |> 
  filter(str_detect(type_of_waste_inlet, "(?i)wood|bark|sawdust|cork|paper"))

# rename type_of_waste_inlet to type_of_waste
names(filtered_2023)[3] <- "type_of_waste"

waste.storage.2023.data <- filtered_2023


combined_storage_data <- bind_rows(waste.storage.2016.data, waste.storage.2017.data, waste.storage.2018.data, waste.storage.2019.data, waste.storage.2020.data, waste.storage.2021.data, waste.storage.2022.data, waste.storage.2023.data)

# clear waste name
combined_storage_data <- combined_storage_data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

# Factor the year column
combined_storage_data$year <- factor(combined_storage_data$year)

# Add a lagged column to compare end year with next year's start year
combined_storage_data <- combined_storage_data |>
    arrange(statistical_region, type_of_waste, year) |>
    group_by(statistical_region, type_of_waste) |>
    mutate(next_year_start = lead(waste_stored_start_year, 1)) |>
    ungroup()

# export
write_csv(combined_storage_data, "Interface/Visualization/data/trt_storage_combined.csv")

# Identify matching instances
matching_instances <- combined_storage_data |>
    filter(waste_stored_end_year == next_year_start) |>
    select(
        year,
        statistical_region,
        type_of_waste,
        waste_stored_start_year,
        waste_stored_end_year,
        next_year_start
    )

summary_table <- matching_instances |>
    group_by(year, statistical_region, type_of_waste) |>
    summarise(matches = n()) |>
    arrange(year, statistical_region, type_of_waste)
`summarise()` has grouped output by 'year', 'statistical_region'. You can
override using the `.groups` argument.
print(summary_table)
# A tibble: 89 × 4
# Groups:   year, statistical_region [48]
   year  statistical_region   type_of_waste             matches
   <fct> <chr>                <chr>                       <int>
 1 2016  OBALNO-KRAŠKA        Other Wood Waste                1
 2 2016  OBALNO-KRAŠKA        Paper/Cardboard Packaging       1
 3 2016  OSREDNJESLOVENSKA    Paper/Cardboard Packaging       1
 4 2016  OSREDNJESLOVENSKA    Wood                            1
 5 2016  PODRAVSKA            Paper/Cardboard Packaging       1
 6 2016  PODRAVSKA            Wooden Packaging                1
 7 2016  POSAVSKA             Paper/Cardboard Packaging       1
 8 2016  POSAVSKA             Wooden Packaging                1
 9 2016  PRIMORSKO-NOTRANJSKA Paper/Cardboard Packaging       1
10 2016  PRIMORSKO-NOTRANJSKA Wooden Packaging                1
# ℹ 79 more rows
t20 <- ggplot(combined_storage_data, aes(
    x = year,
    group = interaction(statistical_region, type_of_waste)
)) +
    geom_line(aes(y = waste_stored_start_year, color = "Start Year")) +
    geom_line(aes(y = waste_stored_end_year, color = "End Year")) +
    geom_point(
        data = matching_instances,
        aes(y = waste_stored_end_year),
        color = "red",
        size = 3
    ) +
    labs(
        title = "Waste Storage Data (2018-2022)",
        subtitle = "Red points indicate matching instances where End Year matches Start Year of the next year",
        x = "Year",
        y = "Waste Stored",
        color = "Legend"
    ) +
    facet_wrap( ~ statistical_region + type_of_waste, scales = "free_y") +
    my_theme

ggplotly(t20, width = 1000, height = 1000)
# Ensure every combination of statistical_region, type_of_waste, and year exists
complete_data <- combined_storage_data |>
    complete(
        year,
        nesting(statistical_region, type_of_waste),
        fill = list(
            waste_stored_start_year = 0,
            waste_stored_end_year = 0
        )
    )

# Add a lagged column for the previous year's end year and calculate the difference
complete_data <- complete_data |>
    arrange(statistical_region, type_of_waste, year) |>
    group_by(statistical_region, type_of_waste) |>
    mutate(previous_end_year = lag(waste_stored_end_year, 1)) |>
    ungroup()

# Create flags for "Outside Period"
complete_data <- complete_data |>
    mutate(
        outside_period_next_start = ifelse(is.na(next_year_start), TRUE, FALSE),
        outside_period_prev_end = ifelse(is.na(previous_end_year), TRUE, FALSE)
    )

# Replace NA with 0 or another placeholder for plotting purposes
complete_data <- complete_data |>
    mutate(
        next_year_start = ifelse(is.na(next_year_start), 0, next_year_start),
        previous_end_year = ifelse(is.na(previous_end_year), 0, previous_end_year)
    )

complete_data <- complete_data |>
    mutate(difference = waste_stored_start_year - previous_end_year)


# Define a threshold for significant change (e.g., 10% increase or decrease)
threshold <- 0.1

complete_data <- complete_data |>
    mutate(
        significant_change = case_when(
            difference > (previous_end_year * threshold) ~ "Increase",
            difference < -(previous_end_year * threshold) ~ "Decrease",
            TRUE ~ "No Significant Change"
        )
    )

filter_by_waste <- complete_data |>
    filter(
        type_of_waste == "Paper/Cardboard Packaging" &
            statistical_region == "OSREDNJESLOVENSKA"
    )

# Define Unicode arrows
arrow_up <- "\u2191"   # Up arrow
arrow_down <- "\u2193" # Down arrow
equal <- "\u003d"      # Equal sign

t21 <- ggplot(filter_by_waste, aes(x = year, group = statistical_region)) +
  geom_line(aes(y = waste_stored_start_year, color = "Start Year", linetype = "Start Year")) +
  geom_line(aes(y = waste_stored_end_year, color = "End Year", linetype = "End Year")) +
  geom_point(aes(y = waste_stored_start_year, 
                 shape = significant_change, 
                 color = significant_change,
                 text = paste("Year:", year,
                              "<br>Waste at start:", waste_stored_start_year,
                              "<br>Change:", significant_change), size = 5)) +
    geom_point(aes(y = waste_stored_end_year, 
                 shape = "End Year", 
                 color = "End Year",
                 text = paste("Year:", year,
                              "<br>Waste at end:", waste_stored_end_year), size = 5)) +
  scale_shape_manual(values = c("Increase" = 24, "Decrease" = 25, "No Change" = 21, "End Year" = 21),
                     name = "Change in Waste") +
  scale_color_manual(values = c("Start Year" = "blue", "End Year" = "red",
                                "Increase" = "#19a90b", "Decrease" = "#a90b0b", "No Change" = "grey"),
                     name = "Data Type") +
  scale_linetype_manual(values = c("Start Year" = "solid", "End Year" = "solid"),
                        name = "Year Type") +
  labs(x = "Year",
       y = "Waste Stored (in tons)") +
  facet_wrap(~statistical_region, scales = "free_y") +
  theme_minimal() +
  theme(legend.position = "right",
        legend.box = "vertical",
        legend.margin = margin(t = 10, r = 10, b = 10, l = 10),
        legend.spacing.y = unit(0.5, "cm"))
Warning in geom_point(aes(y = waste_stored_start_year, shape =
significant_change, : Ignoring unknown aesthetics: text
Warning in geom_point(aes(y = waste_stored_end_year, shape = "End Year", :
Ignoring unknown aesthetics: text
# Convert to plotly
t22 <- ggplotly(t21, width = 1000, height = 1000, tooltip = "text") 

# Adjust legend
t22 <- t22 |> layout(legend = list(x = 1.02, y = 0.5), 
                 title = list(text = paste0('Paper/Cardboard Packaging Waste Storage Data',
                                            '<br>',
                                            '<sup>',
                                            'Triangles indicate significant increases or decreases in waste compared to previous year end',
                                            '</sup>'),
                              x = 0.01),
                 margin = list(l = 50, r = 50, b = 100, t = 80, pad = 4))

t22
# Prepare data for waterfall chart
waterfall_data <- filter_by_waste |>
  arrange(year) |>
  mutate(
    end_year = paste0(as.numeric(levels(year))[year], " End"),
    start_next_year = paste0(as.numeric(levels(year))[year] + 1, " Start"),
    end_amount = waste_stored_end_year,
    start_amount = lead(waste_stored_start_year),
    difference = lead(waste_stored_start_year) - waste_stored_end_year
  ) |>
  select(end_year, start_next_year, end_amount, start_amount, difference) |>
  tidyr::pivot_longer(
    cols = c(end_year, start_next_year),
    names_to = "type",
    values_to = "year"
  ) |>
  mutate(
    amount = ifelse(type == "end_year", end_amount, difference),
    cumulative = cumsum(amount),
    color_category = case_when(
      type == "end_year" ~ "End Year",
      difference > 0 ~ "Increase",
      difference < 0 ~ "Decrease",
      TRUE ~ "No Change"
    )
  ) |>
  filter(!is.na(difference))

# Create a new column for ordered factor
waterfall_data$year <- with(waterfall_data, 
                            paste(year, ifelse(type == "end_year", "", ""), sep = " "))
# Ensure 'order_label' is a factor with the desired order
waterfall_data$year <- factor(waterfall_data$year, 
                              levels = unique(waterfall_data$year))

# Create the waterfall plot with new color scheme
waterfall_plot <- ggplot(waterfall_data, aes(x = year, y = ifelse(waterfall_data$amount == 0, start_amount, amount), fill = color_category)) +
  geom_col(color = "black",
      aes(text = paste0(
    "Year: ", year, "<br>",
    "Amount: ", round(amount, 2), " tons<br>",
    "Cumulative: ", round(cumulative, 2), " tons"
  ))) +
  geom_text(aes(label = round(amount, 1), 
                y = ifelse(amount >= 0, amount, amount) + 0.05 * max(amount)), 
            vjust = 0, size = 3) +
  scale_fill_manual(values = c("Increase" = "#006400", "Decrease" = "#8B0000", 
                               "No Change" = "#808080", "End Year" = "#4169E1"),
                    name = "Change Type") +
  labs(x = "Year",
       y = "Waste Amount (tons)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Warning in geom_col(color = "black", aes(text = paste0("Year: ", year, "<br>",
: Ignoring unknown aesthetics: text
# Convert to plotly for interactivity
t23 <- ggplotly(waterfall_plot, width = 1000, height = 1000, tooltip = "text") 
Warning: Use of `waterfall_data$amount` is discouraged.
ℹ Use `amount` instead.
# Adjust layout
t23 <- t23 |> layout(
  title = list(text = paste0("Comparison of Waste Stored at Year's End and Next Year's Start",
                             '<br>',
                             '<sup>',
                             'Paper/Cardboard Packaging in OSREDNJESLOVENSKA Region',
                             '</sup>'),
               x = 0.01),
  margin = list(t = 100)
)

t23
variant_data <- filter_by_waste |>
  arrange(year) |>
  mutate(
    end_year = paste0(as.numeric(levels(year))[year], " End"),
    start_next_year = paste0(as.numeric(levels(year))[year] + 1, " Start"),
    end_amount = waste_stored_end_year,
    start_amount = lead(waste_stored_start_year),
    difference = lead(waste_stored_start_year) - waste_stored_end_year
  ) |>
  select(end_year, start_next_year, end_amount, start_amount, difference) |>
  tidyr::pivot_longer(
    cols = c(end_year, start_next_year),
    names_to = "type",
    values_to = "year"
  ) |>
  mutate(
    amount = ifelse(type == "end_year", end_amount, start_amount),
    difference = ifelse(type == "start_next_year", difference, 0),
    cumulative = cumsum(amount),
    color_category = case_when(
      type == "end_year" ~ "End Year",
      difference > 0 ~ "Increase",
      difference < 0 ~ "Decrease",
      TRUE ~ "No Change"
    )
  ) |>
  filter(!is.na(start_amount))
# Create a new column for ordered factor
variant_data$year <- with(variant_data, 
                          paste(year, ifelse(type == "end_year", "", ""), sep = " "))
# Ensure 'year' is a factor with the desired order
variant_data$year <- factor(variant_data$year, 
                            levels = unique(variant_data$year))
# Prepare data for side-by-side bars
variant_data_long <- variant_data |>
  tidyr::pivot_longer(
    cols = c(amount, difference),
    names_to = "bar_type",
    values_to = "value"
  ) |>
  mutate(
    bar_category = case_when(
      bar_type == "amount" & type == "end_year" ~ "End Year",
      bar_type == "amount" & type != "end_year" ~ "Start Amount",
      bar_type == "difference" & color_category == "Increase" ~ "Increase",
      bar_type == "difference" & color_category == "Decrease" ~ "Decrease",
      TRUE ~ "No Change"
    )
  )

# Define the desired order of bar categories
desired_order <- c("Start Amount", "Increase", "End Year", "Decrease", "No Change")

# Reorder the data based on the desired order of bar categories
variant_data_long <- variant_data_long |> 
  mutate(bar_category = factor(bar_category, levels = desired_order)) |> 
  arrange(bar_category)

# Create the variant waterfall plot with side-by-side bars
variant_plot <- ggplot(variant_data_long, aes(x = year, y = value, fill = bar_category)) +
  geom_col(position = position_identity(),
           color = "black",
           aes(text = paste0(
             "Year: ", year, "<br>",
             "Type: ", bar_type, "<br>",
             "Value: ", round(value, 2), " tons<br>"
             #"Cumulative: ", round(cumulative, 2), " tons"
           ))) +
    geom_text(aes(label = ifelse(value > 0, round(value, 1), ifelse(value == 0, NA, round(value, 1))),
                y = ifelse(value >= 0, value, value) + 0.05 * max(value)),
            position = position_dodge(width = 0.9),
            vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("End Year" = "#4169E1", 
                               "Increase" = "#006400",  # Increase from Last Year
                               "Start Amount" = "#808080",
                               "Decrease" = "#8B0000", # Decrease from Last Year
                               "No Change" = "#D3D3D3"),
                    name = "Type") +
  labs(x = "Year",
       y = "Waste Amount (tons)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Warning in geom_col(position = position_identity(), color = "black", aes(text =
paste0("Year: ", : Ignoring unknown aesthetics: text
# Convert variant plot to plotly for interactivity
t24 <- ggplotly(variant_plot, width = 1000, height = 800, tooltip = "text") 

# Adjust layout for variant plot
t24 <- t24 |> layout(
  title = list(text = paste0("Comparison of Waste Stored at Year's End and Next Year's Start (Variant)",
                             '<br>',
                             '<sup>',
                             'Paper/Cardboard Packaging in OSREDNJESLOVENSKA Region',
                             '</sup>'),
               x = 0.01),
  margin = list(t = 100)
)


t23
t24

Waste Collected

waste.collected.2016.data <- read_csv("2016 data/2016 filtered/treatment/waste_collected_16.csv")
Rows: 56 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_collected, waste_collected_from_collector_RS, was...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2017.data <- read_csv("2017 data/2017 filtered/treatment/waste_collected_17.csv")
Rows: 53 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, waste_received_own_waste, waste_receive...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2018.data <- read_csv("2018 data/2018 filtered/treatment/waste_collected_18.csv")
Rows: 51 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, received_from_own_waste_OVD, received_f...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2019.data <- read_csv("2019 data/2019 filtered/treatment/waste_collected_19.csv")
Rows: 51 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (7): year, total_waste_received, waste_received_from_producer, waste_rec...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2020.data <- read_csv("2020 data/2020 filtered/treatment/waste_received_20.csv")
Rows: 51 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, waste_received_own_waste, waste_receive...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2021.data <- read_csv("2021 data/2021 filtered/treatment/waste_received_21.csv")
Rows: 52 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, waste_received_own_waste, waste_receive...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2022.data <- read_csv("2022 data/2022 filtered/treatment/waste_received_22.csv")
Rows: 54 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_received, untreated_waste_from_storage_start_year...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2019.data <- waste.collected.2019.data |> 
  mutate(waste_received_own_waste = 0)

waste.collected.2019.data <- waste.collected.2019.data |> 
  select(year, statistical_region, type_of_waste, total_waste_received, waste_received_own_waste, waste_received_from_producer, waste_received_from_collector, waste_received_from_processing_operator, waste_received_from_other_EU_country, waste_received_from_other_country_outside_EU)

names(waste.collected.2016.data) <- colnames(waste.collected.2021.data)
names(waste.collected.2017.data) <- colnames(waste.collected.2021.data)
names(waste.collected.2018.data) <- colnames(waste.collected.2021.data)
names(waste.collected.2019.data) <- colnames(waste.collected.2021.data)
names(waste.collected.2020.data) <- colnames(waste.collected.2021.data)

# define a function to add to each waste.collected the column untreated_waste_from_storage_start_year
add_untreated_waste <- function(data) {
  data |> 
    group_by(statistical_region, type_of_waste) |>
    mutate(untreated_waste_from_storage_start_year = 0) |>
    ungroup()
}

waste.collected.2016.data <- add_untreated_waste(waste.collected.2016.data)
waste.collected.2017.data <- add_untreated_waste(waste.collected.2017.data)
waste.collected.2018.data <- add_untreated_waste(waste.collected.2018.data)
waste.collected.2019.data <- add_untreated_waste(waste.collected.2019.data)
waste.collected.2020.data <- add_untreated_waste(waste.collected.2020.data)
waste.collected.2021.data <- add_untreated_waste(waste.collected.2021.data)

colnames(waste.collected.2021.data)
 [1] "year"                                   
 [2] "statistical_region"                     
 [3] "type_of_waste"                          
 [4] "total_waste_received"                   
 [5] "waste_received_own_waste"               
 [6] "waste_received_from_producers_RS"       
 [7] "waste_received_from_collectors_RS"      
 [8] "waste_received_from_processors_RS"      
 [9] "waste_received_from_EU"                 
[10] "waste_received_from_non_EU"             
[11] "untreated_waste_from_storage_start_year"
# define a function to select columns
select_columns <- function(data) {
  data |> 
    select(year, statistical_region, type_of_waste, total_waste_received, untreated_waste_from_storage_start_year, waste_received_own_waste, waste_received_from_producers_RS, waste_received_from_collectors_RS, waste_received_from_processors_RS, waste_received_from_EU, waste_received_from_non_EU)
}

waste.collected.2016.data <- select_columns(waste.collected.2016.data)
waste.collected.2017.data <- select_columns(waste.collected.2017.data)
waste.collected.2018.data <- select_columns(waste.collected.2018.data)
waste.collected.2019.data <- select_columns(waste.collected.2019.data)
waste.collected.2020.data <- select_columns(waste.collected.2020.data)
waste.collected.2021.data <- select_columns(waste.collected.2021.data)

names(waste.collected.2022.data) <- colnames(waste.collected.2021.data)

combined_collected_data <- bind_rows(waste.collected.2016.data, waste.collected.2017.data, waste.collected.2018.data, waste.collected.2019.data, waste.collected.2020.data, waste.collected.2021.data, waste.collected.2022.data)

combined_collected_data <- combined_collected_data |>
  mutate(type_of_waste = clear_waste_name(type_of_waste))

# export 
write_csv(combined_collected_data, "Interface/Visualization/data/trt_collected_combined.csv")

# Summarize total waste received by year
yearly_total <- combined_collected_data |>
  group_by(year) |>
  summarize(total_waste = sum(total_waste_received, na.rm = TRUE))

# Create the plotly line chart
plot_yearly <- plot_ly(yearly_total, x = ~year, y = ~total_waste, type = 'scatter', mode = 'lines+markers') |>
  layout(title = "Total Waste Received Over Years",
         xaxis = list(title = "Year"),
         yaxis = list(title = "Total Waste Received"))

# Display the plot
plot_yearly
# Summarize total waste received by statistical region
region_total <- combined_collected_data |>
  group_by(statistical_region) |>
  summarize(total_waste = sum(total_waste_received, na.rm = TRUE)) |>
  arrange(desc(total_waste))

# Create the plotly bar chart
plot_region <- plot_ly(region_total, x = ~statistical_region, y = ~total_waste, type = 'bar') |>
  layout(title = "Total Waste Received by Statistical Region",
         xaxis = list(title = "Statistical Region"),
         yaxis = list(title = "Total Waste Received"))

# Display the plot
plot_region
Warning: Ignoring 1 observations
# Summarize total waste received by type of waste
waste_type_total <- combined_collected_data |>
  group_by(type_of_waste) |>
  summarize(total_waste = sum(total_waste_received, na.rm = TRUE)) |>
  arrange(desc(total_waste)) 

# Create the plotly bar chart
plot_waste_type <- plot_ly(waste_type_total, x = ~type_of_waste, y = ~total_waste, type = 'bar') |>
  layout(title = "Total Waste Received by Type of Waste",
         xaxis = list(title = "Type of Waste"),
         yaxis = list(title = "Total Waste Received"))

# Display the plot
plot_waste_type

Treatment of Waste

waste.treatment.2016.data <- read_csv("2016 data/2016 filtered/treatment/treatment_of_waste_16.csv")
Rows: 54 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_given_away

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.treatment.2017.data <- read_csv("2017 data/2017 filtered/treatment/treatment_of_waste_17.csv")
Rows: 52 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_given_away

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.treatment.2018.data <- read_csv("2018 data/2018 filtered/treatment/treatment_of_waste_18.csv")
Rows: 51 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_given_away

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.treatment.2019.data <- read_csv("2019 data/2019 filtered/treatment/treatment_of_waste_received_19.csv")
Rows: 51 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_for_processing

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.treatment.2020.data <- read_csv("2020 data/2020 filtered/treatment/treatment_of_waste_received_20.csv")
Rows: 51 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, waste_entering_treatment_process

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.treatment.2021.data <- read_csv("2021 data/2021 filtered/treatment/treatment_of_waste_received_21.csv")
Rows: 53 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, waste_entering_treatment_process

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.treatment.2022.data <- read_csv("2022 data/2022 filtered/treatment/waste_treatment_22.csv")
Rows: 54 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, waste_quantity_per_treatment

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.treatment.2016.data) <- colnames(waste.treatment.2021.data)
names(waste.treatment.2017.data) <- colnames(waste.treatment.2021.data)
names(waste.treatment.2018.data) <- colnames(waste.treatment.2021.data)
names(waste.treatment.2019.data) <- colnames(waste.treatment.2021.data)
names(waste.treatment.2022.data) <- colnames(waste.treatment.2021.data)

combined_treatment_data <- bind_rows(waste.treatment.2016.data, waste.treatment.2017.data, waste.treatment.2018.data, waste.treatment.2019.data, waste.treatment.2020.data, waste.treatment.2021.data, waste.treatment.2022.data)

# if statistical_region NA, set to "NEOPREDELJENO"
combined_treatment_data <- combined_treatment_data |>
    mutate(statistical_region = ifelse(is.na(statistical_region), "NEOPREDELJENO", statistical_region))

combined_treatment_data <- combined_treatment_data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

# export
write_csv(combined_treatment_data, "Interface/Visualization/data/trt_treatment_combined.csv")

# Aggregate data by year and type of waste
aggregated_data <- combined_treatment_data |>
  # filter out statistical_region = "NEOPREDELJENO"
  filter(statistical_region != "NEOPREDELJENO") |>
  group_by(year, type_of_waste) |>
  summarize(total_waste = sum(waste_entering_treatment_process, na.rm = TRUE))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
# Create the ggplot visualization
p <- ggplot(aggregated_data, aes(x = year, y = total_waste, color = type_of_waste, group = type_of_waste)) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  labs(title = "Waste Treatment Over Time by Waste Type",
       x = "Year",
       y = "Total Waste Entering Treatment Process",
       color = "Type of Waste") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5, face = "bold"))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ggplotly(p, width = 1000, height = 800)

Waste Generated - ON HOLD

waste.generated.2016.data <- read_csv("2016 data/2016 filtered/treatment/waste_generated_and_untreated_16.csv")
Rows: 36 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_generated_untreated, waste_generated_to_processin...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.generated.2017.data <- read_csv("2017 data/2017 filtered/treatment/waste_generated_and_untreated_17.csv")
Rows: 37 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_generated, waste_generated_collector_RS, waste_ge...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.generated.2018.data <- read_csv("2018 data/2018 filtered/treatment/waste_generated_18.csv")
Rows: 37 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_generated, delivered_to_collector_RS, delivered_t...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.generated.2019.data <- read_csv("2019 data/2019 filtered/treatment/waste_generated_in_treatment_process_19.csv")
Rows: 34 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, waste_recovered_and_given_away, waste_recovered_and_given_awa...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.generated.2020.data <- read_csv("2020 data/2020 filtered/treatment/management_of_waste_from_treatment_operations_and_untreated_waste_20.csv")
Rows: 2 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): input_waste_name, output_waste_name
dbl (5): year, waste_handed_to_collectors_RS, waste_delivered_to_operators_R...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.generated.2021.data <- read_csv("2021 data/2021 filtered/treatment/management_of_waste_from_treatment_operations_and_untreated_waste_21.csv")
Rows: 41 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, input_waste_name, output_waste_name
dbl (5): year, waste_handed_to_collectors_RS, waste_delivered_to_operators_R...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Municipal Waste Received

municipal.waste.received.2018.data <- read_csv("2018 data/2018 filtered/treatment/municipal_waste_received_18.csv")
Rows: 123 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): name_of_company, statistical_region, name_of_municipality, type_of_...
dbl (2): year, total_waste_received

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
municipal.waste.received.2019.data <- read_csv("2019 data/2019 filtered/treatment/municipal_waste_received_for_treatment_19.csv")
Rows: 91 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, total_waste_received

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
municipal.waste.received.2020.data <- read_csv("2020 data/2020 filtered/treatment/municipal_waste_received_for_treatment,_separated_by_municipality_of_origin_20.csv")
Rows: 158 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, waste_collected_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
municipal.waste.received.2021.data <- read_csv("2021 data/2021 filtered/treatment/municipal_waste_received_for_treatment,_separated_by_municipality_of_origin_21.csv")
Rows: 164 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, waste_collected_by_municipality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# exclude 2022 data, as we cannot compare it with the previous years, because the data is missing type_of_waste column
municipal.waste.received.2022.data <- read_csv("2022 data/2022 filtered/treatment/mbt_municipal_origin_22.csv")
Rows: 197 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, name_of_municipality
dbl (2): year, mixed_waste_mech_bio_treatment

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
municipal.waste.received.2018.data <- municipal.waste.received.2018.data |> 
  group_by(year, statistical_region, name_of_municipality, type_of_waste) |>
  summarize(total_waste_received = sum(total_waste_received, na.rm = TRUE)) |> 
  ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
names(municipal.waste.received.2018.data) <- colnames(municipal.waste.received.2021.data)
names(municipal.waste.received.2019.data) <- colnames(municipal.waste.received.2021.data)

combined_municipal_waste_received_data <- bind_rows(municipal.waste.received.2018.data, municipal.waste.received.2019.data, municipal.waste.received.2020.data, municipal.waste.received.2021.data)

combined_municipal_waste_received_data <- combined_municipal_waste_received_data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

# export
write_csv(combined_municipal_waste_received_data, "Interface/Visualization/data/trt_municipal_waste_received_combined.csv")

colnames(combined_municipal_waste_received_data)
[1] "year"                            "statistical_region"             
[3] "name_of_municipality"            "type_of_waste"                  
[5] "waste_collected_by_municipality"
# Column Content:
# 1. Year
# 2. Statistical Region
# 3. Name of Municipality
# 4. Type of Waste
# 5. Total Waste Received

# Calculate the average waste collected per municipality across all years
waste_by_municipality <- combined_municipal_waste_received_data |>
  group_by(statistical_region, name_of_municipality) |>
  summarise(avg_waste = mean(waste_collected_by_municipality, na.rm = TRUE)) |>
  ungroup() |> 
  # remove space between words in name_of_municipality
  mutate(name_of_municipality = str_replace_all(name_of_municipality, " ", ""))
`summarise()` has grouped output by 'statistical_region'. You can override
using the `.groups` argument.
slovenia_map <- sf::st_read("map.geojson") 
Reading layer `map' from data source 
  `C:\Users\kovac\Desktop\Work\Green UP Project\green-up-project\map.geojson' 
  using driver `GeoJSON'
Simple feature collection with 192 features and 13 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 13.3821 ymin: 45.4283 xmax: 16.5843 ymax: 46.8782
Geodetic CRS:  WGS 84
slovenia_map <- slovenia_map |> 
  mutate(NAME_1 = str_to_upper(NAME_1),
         NAME_2 = str_to_upper(NAME_2))

# Join the waste data with the map data
slovenia_map_with_data <- slovenia_map |>
  left_join(waste_by_municipality, by = c("NAME_2" = "name_of_municipality"))

# check how many non-NA values does the column statistical_region have
print(sum(!is.na(slovenia_map_with_data$statistical_region)))
[1] 117
# which of the name_of_municipality values are not in the map data from the waste_by_municipality data
print(setdiff(waste_by_municipality$name_of_municipality, slovenia_map_with_data$NAME_2))
 [1] "KOČEVJE"                 "ČRNOMELJ"               
 [3] "ANKARAN"                 "DIVAČA"                 
 [5] "IVANČNAGORICA"           "HOČE-SLIVNICA"          
 [7] "KIDRIČEVO"               "MAKOLE"                 
 [9] "POLJČANE"                "RAČE-FRAM"              
[11] "SV.TROJICAVSLOV.GORICAH" "SVETIJURIJVSLOV.GORICAH"
[13] "SVETIJURIJOBŠČAVNICI"    "KOSTANJEVICANAKRKI"     
[15] "RADEČE"                  "BRASLOVČE"              
[17] "PODČETRTEK"              "ZREČE"                  
[19] "ŠENTJUR"                
# if slovenia_map_with_data$statistical_region is NA, put the region from NAME_1
slovenia_map_with_data <- slovenia_map_with_data |>
  mutate(statistical_region = ifelse(is.na(statistical_region), NAME_1, statistical_region))

static_map <- ggplot(slovenia_map_with_data) +
  geom_sf(aes(fill = avg_waste, text = paste(
    "Municipality:", NAME_2, 
    "<br>",
    ifelse(is.na(avg_waste), 
           "No data available", 
           paste("Average Waste:", round(avg_waste, 2))
    )
  ))) +
  scale_fill_viridis(
    option = "plasma", 
    name = "Average Waste\n(2018-2021)", 
    labels = scales::comma,
    na.value = "grey80"  # Color for municipalities with no data
  ) +
  theme_minimal() +
  labs(title = "Average Municipal Waste Collected in Slovenia (2018-2021)") +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 12),
    legend.position = "right",
    axis.text = element_blank(),
    axis.ticks = element_blank()
  )
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
# Convert to an interactive plot
interactive_map <- ggplotly(static_map, tooltip = "text", width = 1000, height = 800) |>
  layout(
    hoverlabel = list(bgcolor = "white", font = list(family = "Arial", size = 12))
  ) 

# Display the interactive map
interactive_map
# Trend analysis by municipality
trend_by_municipality <- combined_municipal_waste_received_data |>
  group_by(year, name_of_municipality) |>
  summarise(total_waste = sum(waste_collected_by_municipality, na.rm = TRUE)) |>
  ggplot(aes(x = year, y = total_waste, color = name_of_municipality)) +
  geom_line() +
  labs(title = "Waste Collected Over Time by Municipality",
       x = "Year", y = "Waste Collected")
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
# Regional comparison
region_comparison <- combined_municipal_waste_received_data |>
  group_by(statistical_region) |>
  summarise(total_waste = sum(waste_collected_by_municipality, na.rm = TRUE)) |>
  ggplot(aes(x = reorder(statistical_region, -total_waste), y = total_waste, fill = statistical_region)) +
  geom_bar(stat = "identity") +
  labs(title = "Total Waste Collected by Region",
       x = "Statistical Region", y = "Waste Collected") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# create columns based on type_of_waste, first lets get distinct types of waste
distinct_waste_types <- combined_municipal_waste_received_data |> 
  distinct(type_of_waste)

# Paper/Cardboard Packaging
# Other Wood Waste
# Wooden packaging

# create columns for each waste type
combined_municipal_waste_received_data <- combined_municipal_waste_received_data |>
  mutate(paper_cardboard_packaging = ifelse(type_of_waste == "Paper/Cardboard Packaging", waste_collected_by_municipality, 0),
         other_wood_waste = ifelse(type_of_waste == "Other Wood Waste", waste_collected_by_municipality, 0),
         wooden_packaging = ifelse(type_of_waste == "Wooden Packaging", waste_collected_by_municipality, 0))

# Correlation analysis between waste types
correlation_analysis <- combined_municipal_waste_received_data |>
  group_by(name_of_municipality) |>
  summarise(paper_cardboard_packaging = sum(paper_cardboard_packaging, na.rm = TRUE),
            other_wood_waste = sum(other_wood_waste, na.rm = TRUE),
            wooden_packaging = sum(wooden_packaging, na.rm = TRUE)) |>
  ggplot(aes(x = paper_cardboard_packaging, y = other_wood_waste)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Correlation between Paper/Cardboard Packaging and Other Wood Waste",
       x = "Paper/Cardboard Packaging", y = "Other Wood Waste")

ggplotly(trend_by_municipality, width = 1000, height = 800)
ggplotly(region_comparison, width = 1000, height = 800)
ggplotly(correlation_analysis, width = 1000, height = 800)
`geom_smooth()` using formula = 'y ~ x'

Difference in Waste Input and Treatment Process

waste.input.treatment.2018.data <- read_csv("2018 data/2018 filtered/treatment/difference_in_waste_input_and_treatment_process_18.csv")
Rows: 5 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (2): year, lost_mass

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.input.treatment.2019.data <- read_csv("2019 data/2019 filtered/treatment/difference_in_mass_between_waste_input_and_treatment_process_19.csv")
Rows: 6 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (2): year, lost_mass

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.input.treatment.2020.data <- read_csv("2020 data/2020 filtered/treatment/mass_difference_between_waste_input_and_treatment_product_volumes_due_to_mass_change_during_the_treatment_process_20.csv")
Rows: 4 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): input_waste_name
dbl (2): year, mass_change

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.input.treatment.2021.data <- read_csv("2021 data/2021 filtered/treatment/mass_difference_between_waste_input_and_treatment_product_volumes_due_to_mass_change_during_the_treatment_process_21.csv")
Rows: 3 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): input_waste_name
dbl (2): year, mass_change

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.input.treatment.2022.data <- read_csv("2022 data/2022 filtered/treatment/mass_diff_22.csv")
Rows: 7 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): type_of_waste, treatment_operation
dbl (2): year, reduction_or_increase_of_mass_during_treatment

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.input.treatment.2018.data <- waste.input.treatment.2018.data |>
    rename(mass_change = lost_mass)

waste.input.treatment.2019.data <- waste.input.treatment.2019.data |>
    rename(mass_change = lost_mass)

waste.input.treatment.2020.data <- waste.input.treatment.2020.data |>
    rename(type_of_waste = input_waste_name)

waste.input.treatment.2021.data <- waste.input.treatment.2021.data |>
    rename(type_of_waste = input_waste_name)

waste.input.treatment.2022.data <- waste.input.treatment.2022.data |>
    rename(mass_change = reduction_or_increase_of_mass_during_treatment)

combined_input_treatment_data <- bind_rows(waste.input.treatment.2018.data, waste.input.treatment.2019.data, waste.input.treatment.2020.data, waste.input.treatment.2021.data, waste.input.treatment.2022.data)

combined_input_treatment_data <- combined_input_treatment_data |>
    mutate(type_of_waste = clear_waste_name(type_of_waste))

# export
write_csv(combined_input_treatment_data, "Interface/Visualization/data/trt_input_treatment_combined.csv")

# summarize by year, type_of_waste and treatment_operation
aggregated_data <- combined_input_treatment_data |>
  group_by(year, type_of_waste, treatment_operation) |>
  summarize(total_mass_change = sum(mass_change, na.rm = TRUE)) |> 
  ungroup()
`summarise()` has grouped output by 'year', 'type_of_waste'. You can override
using the `.groups` argument.
skim(aggregated_data)
Data summary
Name aggregated_data
Number of rows 19
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
type_of_waste 0 1.00 4 25 0 5 0
treatment_operation 12 0.37 4 10 0 4 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 2020.53 1.43 2018.00 2019.50 2021.00 2022.00 2022.00 ▂▃▅▃▇
total_mass_change 0 1 55.28 238.94 -494.74 0.09 18.67 65.89 880.15 ▁▇▆▁▁
p <- aggregated_data |>
    ggplot(aes(x = year, y = total_mass_change, fill = type_of_waste)) +
    geom_col(position = "dodge") +
    facet_wrap(~ treatment_operation) +
    theme_minimal() +
    labs(title = "Mass Change During Treatment Over the Years by Waste Type and Operation",
         x = "Year", y = "Total Mass Change")

ggplotly(p, width = 1000, height = 800)
p <- aggregated_data |>
    ggplot(aes(x = year, y = total_mass_change, fill = treatment_operation)) +
    geom_col(position = "dodge") +
    facet_wrap(~ type_of_waste) +
    theme_minimal() +
    labs(title = "Mass Change During Treatment Over the Years by Waste Type and Operation",
         x = "Year", y = "Total Mass Change")

ggplotly(p, width = 1000, height = 800)
p <- aggregated_data |>
    ggplot(aes(x = year, y = total_mass_change, fill = type_of_waste, color = treatment_operation)) +
    geom_col(position = "dodge") +
    theme_minimal() +
    labs(title = "Mass Change During Treatment Over the Years by Waste Type and Operation",
         x = "Year", y = "Total Mass Change")

ggplotly(p, width = 1000, height = 800)

Landfill Capacity - ON HOLD

waste.landfill.capacity.2018.data <- read_csv("2018 data/2018 filtered/treatment/landfill_capacity_18.csv")
Rows: 19 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): name_of_company, landfill, start_date, end_date
dbl (7): year, filled_volume_start_year, filled_area_start_year, free_volume...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Landfill Quantities Management - ON HOLD

waste.landfill.quantities.2019.data <- read_csv("2017 data/2017 filtered/treatment/total_waste_in_landfill_17.csv")
Rows: 2 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_company, landfill, type_of_waste
dbl (2): total_non_hazardous_waste_received, total_deposited

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.landfill.quantities.2018.data <- read_csv("2018 data/2018 filtered/treatment/landfill_quantities_management_18.csv")
Rows: 1 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_company, landfill, type_of_waste
dbl (3): year, total_non_hazardous_waste_received, disposed

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.